NMM: Move detections merge into Detections class.

* No other changes!
roboflow · SkalskiP · May 27, 2024 · Oct 13, 2023 · Oct 18, 2023 · Oct 18, 2023
commit 97c407101a2755db3288613c97cbbcda4e8105c0
@@ -27,103 +27,6 @@
 from supervision.validators import validate_detections_fields
 
 
-def _merge_object_detection_pair(det1: Detections, det2: Detections) -> Detections:
-    """
-    Merges two Detections object into a single Detections object.
-    Assumes each Detections contains exactly one object.
-
-    A `winning` detection is determined based on the confidence score of the two
-    input detections. This winning detection is then used to specify which
-    `class_id`, `tracker_id`, and `data` to include in the merged Detections object.
-
-    The resulting `confidence` of the merged object is calculated by the weighted
-    contribution of each detection to the merged object.
-    The bounding boxes and masks of the two input detections are merged into a
-    single bounding box and mask, respectively.
-
-    Args:
-        det1 (Detections):
-            The first Detections object
-        det2 (Detections):
-            The second Detections object
-
-    Returns:
-        Detections: A new Detections object, with merged attributes.
-
-    Raises:
-        ValueError: If the input Detections objects do not have exactly 1 detected
-            object.
-
-    Example:
-        ```python
-        import cv2
-        import supervision as sv
-        from inference import get_model
-
-        image = cv2.imread(<SOURCE_IMAGE_PATH>)
-        model = get_model(model_id="yolov8s-640")
-
-        result = model.infer(image)[0]
-        detections = sv.Detections.from_inference(result)
-
-        merged_detections = merge_object_detection_pair(
-            detections[0], detections[1])
-        ```
-    """
-    if len(det1) != 1 or len(det2) != 1:
-        raise ValueError("Both Detections should have exactly 1 detected object.")
-
-    if det2.confidence is None:
-        winning_det = det1
-    elif det1.confidence is None:
-        winning_det = det2
-    elif det1.confidence[0] >= det2.confidence[0]:
-        winning_det = det1
-    else:
-        winning_det = det2
-
-    area_det1 = (det1.xyxy[0][2] - det1.xyxy[0][0]) * (
-        det1.xyxy[0][3] - det1.xyxy[0][1]
-    )
-    area_det2 = (det2.xyxy[0][2] - det2.xyxy[0][0]) * (
-        det2.xyxy[0][3] - det2.xyxy[0][1]
-    )
-
-    merged_x1, merged_y1 = np.minimum(det1.xyxy[0][:2], det2.xyxy[0][:2])
-    merged_x2, merged_y2 = np.maximum(det1.xyxy[0][2:], det2.xyxy[0][2:])
-
-    merged_xy = np.array([[merged_x1, merged_y1, merged_x2, merged_y2]])
-
-    winning_class_id = winning_det.class_id
-
-    if det1.confidence is None or det2.confidence is None:
-        merged_confidence = None
-    else:
-        merged_confidence = (
-            area_det1 * det1.confidence[0] + area_det2 * det2.confidence[0]
-        ) / (area_det1 + area_det2)
-        merged_confidence = np.array([merged_confidence])
-
-    merged_mask = None
-    if det1.mask is not None and det2.mask is not None:
-        merged_mask = np.logical_or(det1.mask, det2.mask)
-
-    winning_tracker_id = winning_det.tracker_id
-
-    winning_data = None
-    if det1.data and det2.data:
-        winning_data = winning_det.data
-
-    return Detections(
-        xyxy=merged_xy,
-        mask=merged_mask,
-        confidence=merged_confidence,
-        class_id=winning_class_id,
-        tracker_id=winning_tracker_id,
-        data=winning_data,
-    )
-
-
 @dataclass
 class Detections:
     """
@@ -295,7 +198,8 @@ def from_yolov5(cls, yolov5_results) -> Detections:
             detections = sv.Detections.from_yolov5(result)
             ```
         """
-        yolov5_detections_predictions = yolov5_results.pred[0].cpu().cpu().numpy()
+        yolov5_detections_predictions = yolov5_results.pred[0].cpu(
+        ).cpu().numpy()
 
         return cls(
             xyxy=yolov5_detections_predictions[:, :4],
@@ -342,7 +246,8 @@ def from_ultralytics(cls, ultralytics_results) -> Detections:
 
         if "obb" in ultralytics_results and ultralytics_results.obb is not None:
             class_id = ultralytics_results.obb.cls.cpu().numpy().astype(int)
-            class_names = np.array([ultralytics_results.names[i] for i in class_id])
+            class_names = np.array(
+                [ultralytics_results.names[i] for i in class_id])
             oriented_box_coordinates = ultralytics_results.obb.xyxyxyxy.cpu().numpy()
             return cls(
                 xyxy=ultralytics_results.obb.xyxy.cpu().numpy(),
@@ -358,7 +263,8 @@ def from_ultralytics(cls, ultralytics_results) -> Detections:
             )
 
         class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int)
-        class_names = np.array([ultralytics_results.names[i] for i in class_id])
+        class_names = np.array([ultralytics_results.names[i]
+                               for i in class_id])
         return cls(
             xyxy=ultralytics_results.boxes.xyxy.cpu().numpy(),
             confidence=ultralytics_results.boxes.conf.cpu().numpy(),
@@ -446,7 +352,8 @@ def from_tensorflow(
         return cls(
             xyxy=boxes,
             confidence=tensorflow_results["detection_scores"][0].numpy(),
-            class_id=tensorflow_results["detection_classes"][0].numpy().astype(int),
+            class_id=tensorflow_results["detection_classes"][0].numpy().astype(
+                int),
         )
 
     @classmethod
@@ -483,7 +390,8 @@ def from_deepsparse(cls, deepsparse_results) -> Detections:
         return cls(
             xyxy=np.array(deepsparse_results.boxes[0]),
             confidence=np.array(deepsparse_results.scores[0]),
-            class_id=np.array(deepsparse_results.labels[0]).astype(float).astype(int),
+            class_id=np.array(deepsparse_results.labels[0]).astype(
+                float).astype(int),
         )
 
     @classmethod
@@ -570,24 +478,29 @@ class names. If provided, the resulting Detections object will contain
             Class names values can be accessed using `detections["class_name"]`.
         """  # noqa: E501 // docs
 
-        class_ids = transformers_results["labels"].cpu().detach().numpy().astype(int)
+        class_ids = transformers_results["labels"].cpu(
+        ).detach().numpy().astype(int)
         data = {}
         if id2label is not None:
-            class_names = np.array([id2label[class_id] for class_id in class_ids])
+            class_names = np.array([id2label[class_id]
+                                   for class_id in class_ids])
             data[CLASS_NAME_DATA_FIELD] = class_names
         if "boxes" in transformers_results:
             return cls(
                 xyxy=transformers_results["boxes"].cpu().detach().numpy(),
-                confidence=transformers_results["scores"].cpu().detach().numpy(),
+                confidence=transformers_results["scores"].cpu(
+                ).detach().numpy(),
                 class_id=class_ids,
                 data=data,
             )
         elif "masks" in transformers_results:
-            masks = transformers_results["masks"].cpu().detach().numpy().astype(bool)
+            masks = transformers_results["masks"].cpu(
+            ).detach().numpy().astype(bool)
             return cls(
                 xyxy=mask_to_xyxy(masks),
                 mask=masks,
-                confidence=transformers_results["scores"].cpu().detach().numpy(),
+                confidence=transformers_results["scores"].cpu(
+                ).detach().numpy(),
                 class_id=class_ids,
                 data=data,
             )
@@ -630,7 +543,8 @@ class IDs, and confidences of the predictions.
         """
 
         return cls(
-            xyxy=detectron2_results["instances"].pred_boxes.tensor.cpu().numpy(),
+            xyxy=detectron2_results["instances"].pred_boxes.tensor.cpu(
+            ).numpy(),
             confidence=detectron2_results["instances"].scores.cpu().numpy(),
             class_id=detectron2_results["instances"]
             .pred_classes.cpu()
@@ -673,7 +587,8 @@ def from_inference(cls, roboflow_result: Union[dict, Any]) -> Detections:
             Class names values can be accessed using `detections["class_name"]`.
         """
         with suppress(AttributeError):
-            roboflow_result = roboflow_result.dict(exclude_none=True, by_alias=True)
+            roboflow_result = roboflow_result.dict(
+                exclude_none=True, by_alias=True)
         xyxy, confidence, class_id, masks, trackers, data = process_roboflow_result(
             roboflow_result=roboflow_result
         )
@@ -765,7 +680,8 @@ def from_sam(cls, sam_result: List[dict]) -> Detections:
         )
 
         xywh = np.array([mask["bbox"] for mask in sorted_generated_masks])
-        mask = np.array([mask["segmentation"] for mask in sorted_generated_masks])
+        mask = np.array([mask["segmentation"]
+                        for mask in sorted_generated_masks])
 
         if np.asarray(xywh).shape[0] == 0:
             return cls.empty()
@@ -992,7 +908,8 @@ def stack_or_none(name: str):
             if all(d.__getattribute__(name) is None for d in detections_list):
                 return None
             if any(d.__getattribute__(name) is None for d in detections_list):
-                raise ValueError(f"All or none of the '{name}' fields must be None")
+                raise ValueError(
+                    f"All or none of the '{name}' fields must be None")
             return (
                 np.vstack([d.__getattribute__(name) for d in detections_list])
                 if name == "mask"
@@ -1178,7 +1095,8 @@ def _set_at_index(self, index: int, other: Detections):
             ValueError: If `other` is not made of exactly one element.
         """
         if len(other) != 1:
-            raise ValueError("Detection to set from must have exactly one element.")
+            raise ValueError(
+                "Detection to set from must have exactly one element.")
 
         self.xyxy[index] = other.xyxy[0]
         if self.mask is not None and other.mask is not None:
@@ -1250,7 +1168,8 @@ def with_nms(
         ), "Detections confidence must be given for NMS to be executed."
 
         if class_agnostic:
-            predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))
+            predictions = np.hstack(
+                (self.xyxy, self.confidence.reshape(-1, 1)))
         else:
             assert self.class_id is not None, (
                 "Detections class_id must be given for NMS to be executed. If you"
@@ -1306,7 +1225,8 @@ def with_nmm(
         ), "Detections confidence must be given for NMM to be executed."
 
         if class_agnostic:
-            predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))
+            predictions = np.hstack(
+                (self.xyxy, self.confidence.reshape(-1, 1)))
             keep_to_merge_list = non_max_merge(predictions, threshold)
         else:
             assert self.class_id is not None, (
@@ -1325,12 +1245,111 @@ def with_nmm(
         result = []
         for keep_ind, merge_ind_list in keep_to_merge_list.items():
             for merge_ind in merge_ind_list:
-                box_iou = box_iou_batch(self[keep_ind].xyxy, self[merge_ind].xyxy)[0]
+                box_iou = box_iou_batch(
+                    self[keep_ind].xyxy, self[merge_ind].xyxy)[0]
                 if box_iou > threshold:
-                    merged_detection = _merge_object_detection_pair(
+                    merged_detection = self._merge_object_detection_pair(
                         self[keep_ind], self[merge_ind]
                     )
                     self._set_at_index(keep_ind, merged_detection)
             result.append(self[keep_ind])
 
         return Detections.merge(result)
+
+    @staticmethod
+    def _merge_object_detection_pair(det1: Detections, det2: Detections) -> Detections:
+        """
+        Merges two Detections object into a single Detections object.
+        Assumes each Detections contains exactly one object.
+
+        A `winning` detection is determined based on the confidence score of the two
+        input detections. This winning detection is then used to specify which
+        `class_id`, `tracker_id`, and `data` to include in the merged Detections object.
+
+        The resulting `confidence` of the merged object is calculated by the weighted
+        contribution of each detection to the merged object.
+        The bounding boxes and masks of the two input detections are merged into a
+        single bounding box and mask, respectively.
+
+        Args:
+            det1 (Detections):
+                The first Detections object
+            det2 (Detections):
+                The second Detections object
+
+        Returns:
+            Detections: A new Detections object, with merged attributes.
+
+        Raises:
+            ValueError: If the input Detections objects do not have exactly 1 detected
+                object.
+
+        Example:
+            ```python
+            import cv2
+            import supervision as sv
+            from inference import get_model
+
+            image = cv2.imread(<SOURCE_IMAGE_PATH>)
+            model = get_model(model_id="yolov8s-640")
+
+            result = model.infer(image)[0]
+            detections = sv.Detections.from_inference(result)
+
+            merged_detections = merge_object_detection_pair(
+                detections[0], detections[1])
+            ```
+        """
+        if len(det1) != 1 or len(det2) != 1:
+            raise ValueError(
+                "Both Detections should have exactly 1 detected object.")
+
+        if det2.confidence is None:
+            winning_det = det1
+        elif det1.confidence is None:
+            winning_det = det2
+        elif det1.confidence[0] >= det2.confidence[0]:
+            winning_det = det1
+        else:
+            winning_det = det2
+
+        area_det1 = (det1.xyxy[0][2] - det1.xyxy[0][0]) * (
+            det1.xyxy[0][3] - det1.xyxy[0][1]
+        )
+        area_det2 = (det2.xyxy[0][2] - det2.xyxy[0][0]) * (
+            det2.xyxy[0][3] - det2.xyxy[0][1]
+        )
+
+        merged_x1, merged_y1 = np.minimum(det1.xyxy[0][:2], det2.xyxy[0][:2])
+        merged_x2, merged_y2 = np.maximum(det1.xyxy[0][2:], det2.xyxy[0][2:])
+
+        merged_xy = np.array([[merged_x1, merged_y1, merged_x2, merged_y2]])
+
+        winning_class_id = winning_det.class_id
+
+        if det1.confidence is None or det2.confidence is None:
+            merged_confidence = None
+        else:
+            merged_confidence = (
+                area_det1 * det1.confidence[0] + area_det2 * det2.confidence[0]
+            ) / (area_det1 + area_det2)
+            merged_confidence = np.array([merged_confidence])
+
+        merged_mask = None
+        if det1.mask is not None and det2.mask is not None:
+            merged_mask = np.logical_or(det1.mask, det2.mask)
+
+        winning_tracker_id = winning_det.tracker_id
+
+        winning_data = None
+        if det1.data and det2.data:
+            winning_data = winning_det.data
+
+        return Detections(
+            xyxy=merged_xy,
+            mask=merged_mask,
+            confidence=merged_confidence,
+            class_id=winning_class_id,
+            tracker_id=winning_tracker_id,
+            data=winning_data,
+        )