From c78ae33e43c95e067e2ae34ff9e7616fe696cac3 Mon Sep 17 00:00:00 2001
From: mario-dg <mario.dagraca@gmx.com>
Date: Fri, 13 Oct 2023 18:24:22 +0200
Subject: [PATCH 01/26] =?UTF-8?q?feat:=20=F0=9F=9A=80=20Added=20Non-Maximu?=
 =?UTF-8?q?m=20Merging=20to=20Detections?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/detection/core.py                 | 107 ++++++++++
 .../detection/tools/inference_slicer.py       |  17 +-
 supervision/detection/utils.py                | 190 +++++++++++++++++-
 3 files changed, 310 insertions(+), 4 deletions(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 77bfca9da..006bc6e7e 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -6,7 +6,15 @@
 import numpy as np
 
 from supervision.detection.utils import (
+    batched_greedy_nmm,
+    box_iou_batch,
     extract_ultralytics_masks,
+    get_merged_bbox,
+    get_merged_class_id,
+    get_merged_confidence,
+    get_merged_mask,
+    get_merged_tracker_id,
+    greedy_nmm,
     non_max_suppression,
     process_roboflow_result,
     xywh_to_xyxy,
@@ -729,6 +737,105 @@ def box_area(self) -> np.ndarray:
         """
         return (self.xyxy[:, 3] - self.xyxy[:, 1]) * (self.xyxy[:, 2] - self.xyxy[:, 0])
 
+    def with_nmm(
+        self, threshold: float = 0.5, class_agnostic: bool = False
+    ) -> Detections:
+        """
+        Perform non-maximum merging on the current set of object detections.
+
+        Args:
+            threshold (float, optional): The intersection-over-union threshold
+                to use for non-maximum merging. Defaults to 0.5.
+            class_agnostic (bool, optional): Whether to perform class-agnostic
+                non-maximum merging. If True, the class_id of each detection
+                will be ignored. Defaults to False.
+
+        Returns:
+            Detections: A new Detections object containing the subset of detections
+                after non-maximum merging.
+
+        Raises:
+            AssertionError: If `confidence` is None and class_agnostic is False.
+                If `class_id` is None and class_agnostic is False.
+        """
+        if len(self) == 0:
+            return self
+
+        assert (
+            self.confidence is not None
+        ), "Detections confidence must be given for NMM to be executed."
+
+        if class_agnostic:
+            predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))
+            keep_to_merge_list = greedy_nmm(predictions, threshold)
+        else:
+            predictions = np.hstack(
+                (
+                    self.xyxy,
+                    self.confidence.reshape(-1, 1),
+                    self.class_id.reshape(-1, 1),
+                )
+            )
+            keep_to_merge_list = batched_greedy_nmm(predictions, threshold)
+
+        result = []
+
+        for keep_ind, merge_ind_list in keep_to_merge_list.items():
+            for merge_ind in merge_ind_list:
+                if (
+                    box_iou_batch(self[keep_ind].xyxy, self[merge_ind].xyxy).item()
+                    > threshold
+                ):
+                    self[keep_ind].xyxy = np.vstack(
+                        (
+                            self[keep_ind].xyxy,
+                            get_merged_bbox(self.xyxy[keep_ind], self.xyxy[merge_ind]),
+                        )
+                    )
+                    self[keep_ind].class_id = np.hstack(
+                        (
+                            self[keep_ind].class_id,
+                            get_merged_class_id(
+                                self.class_id[keep_ind].item(),
+                                self.class_id[merge_ind].item(),
+                            ),
+                        )
+                    )
+                    self[keep_ind].confidence = np.hstack(
+                        (
+                            self[keep_ind].confidence,
+                            get_merged_confidence(
+                                self.confidence[keep_ind].item(),
+                                self.confidence[merge_ind].item(),
+                            ),
+                        )
+                    )
+                    if self.mask is not None:
+                        merged_mask = get_merged_mask(
+                            self.mask[keep_ind], self.mask[merge_ind]
+                        )
+                        if self[keep_ind].mask is None:
+                            self[keep_ind].mask = np.array([merged_mask])
+                        else:
+                            self[keep_ind].mask = np.vstack(
+                                (self[keep_ind].mask, merged_mask[np.newaxis])
+                            )
+                    if self.tracker_id is not None:
+                        merged_tracker_id = get_merged_tracker_id(
+                            self.tracker_id[keep_ind].item(),
+                            self.tracker_id[merge_ind].item(),
+                        )
+                        if self[keep_ind].tracker_id is None:
+                            self[keep_ind].tracker_id = np.array(
+                                [merged_tracker_id], dtype=int
+                            )
+                        else:
+                            self[keep_ind].tracker_id = np.hstack(
+                                (self[keep_ind].tracker_id, merged_tracker_id)
+                            )
+            result.append(self[keep_ind])
+        return Detections.merge(result)
+
     def with_nms(
         self, threshold: float = 0.5, class_agnostic: bool = False
     ) -> Detections:
diff --git a/supervision/detection/tools/inference_slicer.py b/supervision/detection/tools/inference_slicer.py
index 5f6fb391d..2098c79c8 100644
--- a/supervision/detection/tools/inference_slicer.py
+++ b/supervision/detection/tools/inference_slicer.py
@@ -36,6 +36,10 @@ class InferenceSlicer:
             slices in the format `(width_ratio, height_ratio)`.
         iou_threshold (Optional[float]): Intersection over Union (IoU) threshold
             used for non-max suppression.
+        merge_detections (Optional[bool]): Whether to merge the detection from all
+            slices or simply concatenate them. If `True`, Non-Maximum Merging (NMM),
+            otherwise Non-Maximum Suppression (NMS),
+            is applied to the final detections.
         callback (Callable): A function that performs inference on a given image
             slice and returns detections.
         thread_workers (int): Number of threads for parallel execution.
@@ -53,11 +57,13 @@ def __init__(
         slice_wh: Tuple[int, int] = (320, 320),
         overlap_ratio_wh: Tuple[float, float] = (0.2, 0.2),
         iou_threshold: Optional[float] = 0.5,
+        merge_detections: Optional[bool] = False,
         thread_workers: int = 1,
     ):
         self.slice_wh = slice_wh
         self.overlap_ratio_wh = overlap_ratio_wh
         self.iou_threshold = iou_threshold
+        self.merge_detections = merge_detections
         self.callback = callback
         self.thread_workers = thread_workers
         validate_inference_callback(callback=callback)
@@ -109,9 +115,14 @@ def __call__(self, image: np.ndarray) -> Detections:
             for future in as_completed(futures):
                 detections_list.append(future.result())
 
-        return Detections.merge(detections_list=detections_list).with_nms(
-            threshold=self.iou_threshold
-        )
+        if self.merge_detections:
+            return Detections.merge(detections_list=detections_list).with_nmm(
+                threshold=self.iou_threshold
+            )
+        else:
+            return Detections.merge(detections_list=detections_list).with_nms(
+                threshold=self.iou_threshold
+            )
 
     def _run_callback(self, image, offset) -> Detections:
         """
diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py
index 7a5eb5469..b0414eb44 100644
--- a/supervision/detection/utils.py
+++ b/supervision/detection/utils.py
@@ -1,4 +1,4 @@
-from typing import List, Optional, Tuple
+from typing import Dict, List, Optional, Tuple
 
 import cv2
 import numpy as np
@@ -110,6 +110,194 @@ def non_max_suppression(
     return keep[sort_index.argsort()]
 
 
+def greedy_nmm(predictions: np.ndarray, threshold: float = 0.5) -> Dict[int, List[int]]:
+    """
+    Apply greedy version of non-maximum merging to avoid detecting too many
+    overlapping bounding boxes for a given object.
+
+    Args:
+        predictions (np.ndarray): An array of shape `(n, 5)` containing
+            the bounding boxes coordinates in format `[x1, y1, x2, y2]`
+            and the confidence scores.
+        threshold (float, optional): The intersection-over-union threshold
+            to use for non-maximum suppression. Defaults to 0.5.
+
+    Returns:
+        Dict[int, List[int]]: Mapping from prediction indices
+        to keep to a list of prediction indices to be merged.
+    """
+    keep_to_merge_list = {}
+
+    x1 = predictions[:, 0]
+    y1 = predictions[:, 1]
+    x2 = predictions[:, 2]
+    y2 = predictions[:, 3]
+
+    scores = predictions[:, 4]
+
+    areas = (x2 - x1) * (y2 - y1)
+
+    order = scores.argsort()
+
+    keep = []
+
+    while len(order) > 0:
+        idx = order[-1]
+
+        keep.append(idx.tolist())
+
+        order = order[:-1]
+
+        if len(order) == 0:
+            keep_to_merge_list[idx.tolist()] = []
+            break
+
+        xx1 = np.take(x1, axis=0, indices=order)
+        xx2 = np.take(x2, axis=0, indices=order)
+        yy1 = np.take(y1, axis=0, indices=order)
+        yy2 = np.take(y2, axis=0, indices=order)
+
+        xx1 = np.maximum(xx1, x1[idx])
+        yy1 = np.maximum(yy1, y1[idx])
+        xx2 = np.minimum(xx2, x2[idx])
+        yy2 = np.minimum(yy2, y2[idx])
+
+        w = np.maximum(0.0, xx2 - xx1)
+        h = np.maximum(0.0, yy2 - yy1)
+
+        inter = w * h
+
+        rem_areas = np.take(areas, axis=0, indices=order)
+
+        union = (rem_areas - inter) + areas[idx]
+        match_metric_value = inter / union
+
+        mask = match_metric_value < threshold
+        mask = mask.astype(np.uint8)
+        matched_box_indices = np.flip(order[np.where(mask == 0)[0]])
+        unmatched_indices = order[np.where(mask == 1)[0]]
+
+        order = unmatched_indices[scores[unmatched_indices].argsort()]
+
+        keep_to_merge_list[idx.tolist()] = []
+
+        for matched_box_ind in matched_box_indices.tolist():
+            keep_to_merge_list[idx.tolist()].append(matched_box_ind)
+
+    return keep_to_merge_list
+
+
+def batched_greedy_nmm(
+    predictions: np.ndarray, threshold: float = 0.5
+) -> Dict[int, List[int]]:
+    """
+    Apply greedy version of non-maximum merging per category to avoid detecting
+    too many overlapping bounding boxes for a given object.
+
+    Args:
+        predictions (np.ndarray): An array of shape `(n, 6)` containing
+            the bounding boxes coordinates in format `[x1, y1, x2, y2]`,
+            the confidence scores and class_ids.
+        threshold (float, optional): The intersection-over-union threshold
+            to use for non-maximum suppression. Defaults to 0.5.
+
+    Returns:
+        Dict[int, List[int]]: Mapping from prediction indices
+        to keep to a list of prediction indices to be merged.
+    """
+    category_ids = predictions[:, 5]
+    keep_to_merge_list = {}
+    for category_id in np.unique(category_ids):
+        curr_indices = np.where(category_ids == category_id)[0]
+        curr_keep_to_merge_list = greedy_nmm(predictions[curr_indices], threshold)
+        curr_indices_list = curr_indices.tolist()
+        for curr_keep, curr_merge_list in curr_keep_to_merge_list.items():
+            keep = curr_indices_list[curr_keep]
+            merge_list = [curr_indices_list[i] for i in curr_merge_list]
+            keep_to_merge_list[keep] = merge_list
+    return keep_to_merge_list
+
+
+def get_merged_bbox(bbox1: np.ndarray, bbox2: np.ndarray) -> np.ndarray:
+    """
+    Merges two bounding boxes into one.
+
+    Args:
+        bbox1 (np.ndarray): A numpy array of shape `(, 4)` where the
+            row corresponds to a bounding box in
+            the format `(x_min, y_min, x_max, y_max)`.
+        bbox2 (np.ndarray): A numpy array of shape `(, 4)` where the
+            row corresponds to a bounding box in
+            the format `(x_min, y_min, x_max, y_max)`.
+
+    Returns:
+        np.ndarray: A numpy array of shape `(, 4)` where the new
+            bounding box is the merged bounding box of `bbox1` and `bbox2`.
+    """
+    left_top = np.minimum(bbox1[:2], bbox2[:2])
+    right_bottom = np.maximum(bbox1[2:], bbox2[2:])
+    return np.concatenate([left_top, right_bottom])
+
+
+def get_merged_class_id(id1: int, id2: int) -> int:
+    """
+    Merges two class ids into one.
+
+    Args:
+        id1 (int): The first class id.
+        id2 (int): The second class id.
+
+    Returns:
+        int: The merged class id.
+    """
+    return max(id1, id2)
+
+
+def get_merged_confidence(confidence1: float, confidence2: float) -> float:
+    """
+    Merges two confidences into one.
+
+    Args:
+        confidence1 (float): The first confidence.
+        confidence2 (float): The second confidence.
+
+    Returns:
+        float: The merged confidence.
+    """
+    return max(confidence1, confidence2)
+
+
+def get_merged_mask(mask1: np.ndarray, mask2: np.ndarray) -> np.ndarray:
+    """
+    Merges two masks into one.
+
+    Args:
+        mask1 (np.ndarray): A numpy array of shape `(H, W)` where `H` and `W`
+            are the height and width of the mask, respectively.
+        mask2 (np.ndarray): A numpy array of shape `(H, W)` where `H` and `W`
+            are the height and width of the mask, respectively.
+
+    Returns:
+        np.ndarray: A numpy array of shape `(H, W)` where the new mask is the
+            merged mask of `mask1` and `mask2`.
+    """
+    return np.logical_or(mask1, mask2)
+
+
+def get_merged_tracker_id(tracker_id1: int, tracker_id2: int) -> int:
+    """
+    Merges two tracker ids into one.
+
+    Args:
+        tracker_id1 (int): The first tracker id.
+        tracker_id2 (int): The second tracker id.
+
+    Returns:
+        int: The merged tracker id.
+    """
+    return max(tracker_id1, tracker_id2)
+
+
 def clip_boxes(
     boxes_xyxy: np.ndarray, frame_resolution_wh: Tuple[int, int]
 ) -> np.ndarray:

From 57b12e6e00069d9064df783eaac40d230c4626bd Mon Sep 17 00:00:00 2001
From: mario-dg <mario.dagraca@gmx.com>
Date: Thu, 19 Oct 2023 00:03:36 +0200
Subject: [PATCH 02/26] Added __setitem__ to Detections and refactored the
 object prediction merging

---
 supervision/detection/core.py | 104 +++++++++++++++++++---------------
 1 file changed, 58 insertions(+), 46 deletions(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 006bc6e7e..bd729a964 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -67,6 +67,27 @@ def _validate_tracker_id(tracker_id: Any, n: int) -> None:
         raise ValueError("tracker_id must be None or 1d np.ndarray with (n,) shape")
 
 
+def _merge_object_detection_pair(pred1: Detections, pred2: Detections) -> Detections:
+    merged_bbox = get_merged_bbox(pred1.xyxy, pred2.xyxy)
+    merged_conf = get_merged_confidence(pred1.confidence, pred2.confidence)
+    merged_class_id = get_merged_class_id(pred1.class_id, pred2.class_id)
+    merged_tracker_id = None
+    merged_mask = None
+
+    if pred1.mask and pred2.mask:
+        merged_mask = get_merged_mask(pred1.mask, pred2.mask)
+    if pred1.tracker_id and pred2.tracker_id:
+        merged_tracker_id = get_merged_tracker_id(pred1.tracker_id, pred2.tracker_id)
+
+    return Detections(
+        xyxy=merged_bbox,
+        mask=merged_mask,
+        confidence=merged_conf,
+        class_id=merged_class_id,
+        tracker_id=merged_tracker_id,
+    )
+
+
 @dataclass
 class Detections:
     """
@@ -668,6 +689,38 @@ def get_anchor_coordinates(self, anchor: Position) -> np.ndarray:
 
         raise ValueError(f"{anchor} is not supported.")
 
+    def __setitem__(
+        self, index: Union[int, slice, List[int], np.ndarray], value: Detections
+    ) -> None:
+        """
+        Set a subset of the Detections object.
+
+        Args:
+            index (Union[int, slice, List[int], np.ndarray]):
+                The index or indices of the subset of the Detections
+            value (Detections): The new value of the subset of the Detections
+
+        Example:
+            ```python
+            >>> import supervision as sv
+
+            >>> detections = sv.Detections(...)
+
+            >>> detections[0] = sv.Detections(...)
+            ```
+        """
+        if isinstance(index, int):
+            index = [index]
+        self.xyxy[index] = value.xyxy
+        if self.mask is not None:
+            self.mask[index] = value.mask
+        if self.confidence is not None:
+            self.confidence[index] = value.confidence
+        if self.class_id is not None:
+            self.class_id[index] = value.class_id
+        if self.tracker_id is not None:
+            self.tracker_id[index] = value.tracker_id
+
     def __getitem__(
         self, index: Union[int, slice, List[int], np.ndarray]
     ) -> Detections:
@@ -761,6 +814,8 @@ def with_nmm(
         if len(self) == 0:
             return self
 
+        assert 0.0 <= threshold <= 1.0, "Threshold must be between 0 and 1."
+
         assert (
             self.confidence is not None
         ), "Detections confidence must be given for NMM to be executed."
@@ -786,54 +841,11 @@ def with_nmm(
                     box_iou_batch(self[keep_ind].xyxy, self[merge_ind].xyxy).item()
                     > threshold
                 ):
-                    self[keep_ind].xyxy = np.vstack(
-                        (
-                            self[keep_ind].xyxy,
-                            get_merged_bbox(self.xyxy[keep_ind], self.xyxy[merge_ind]),
-                        )
+                    self[keep_ind] = _merge_object_detection_pair(
+                        self[keep_ind], self[merge_ind]
                     )
-                    self[keep_ind].class_id = np.hstack(
-                        (
-                            self[keep_ind].class_id,
-                            get_merged_class_id(
-                                self.class_id[keep_ind].item(),
-                                self.class_id[merge_ind].item(),
-                            ),
-                        )
-                    )
-                    self[keep_ind].confidence = np.hstack(
-                        (
-                            self[keep_ind].confidence,
-                            get_merged_confidence(
-                                self.confidence[keep_ind].item(),
-                                self.confidence[merge_ind].item(),
-                            ),
-                        )
-                    )
-                    if self.mask is not None:
-                        merged_mask = get_merged_mask(
-                            self.mask[keep_ind], self.mask[merge_ind]
-                        )
-                        if self[keep_ind].mask is None:
-                            self[keep_ind].mask = np.array([merged_mask])
-                        else:
-                            self[keep_ind].mask = np.vstack(
-                                (self[keep_ind].mask, merged_mask[np.newaxis])
-                            )
-                    if self.tracker_id is not None:
-                        merged_tracker_id = get_merged_tracker_id(
-                            self.tracker_id[keep_ind].item(),
-                            self.tracker_id[merge_ind].item(),
-                        )
-                        if self[keep_ind].tracker_id is None:
-                            self[keep_ind].tracker_id = np.array(
-                                [merged_tracker_id], dtype=int
-                            )
-                        else:
-                            self[keep_ind].tracker_id = np.hstack(
-                                (self[keep_ind].tracker_id, merged_tracker_id)
-                            )
             result.append(self[keep_ind])
+
         return Detections.merge(result)
 
     def with_nms(

From 9f222736e129df769a9771bda12eb235795e0801 Mon Sep 17 00:00:00 2001
From: mario-dg <mario.dagraca@gmx.com>
Date: Thu, 19 Oct 2023 00:05:05 +0200
Subject: [PATCH 03/26] Added standard full image inference after sliced
 inference to increase large object detection accuracy

---
 supervision/detection/tools/inference_slicer.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/supervision/detection/tools/inference_slicer.py b/supervision/detection/tools/inference_slicer.py
index 2098c79c8..c0a30ff66 100644
--- a/supervision/detection/tools/inference_slicer.py
+++ b/supervision/detection/tools/inference_slicer.py
@@ -38,8 +38,10 @@ class InferenceSlicer:
             used for non-max suppression.
         merge_detections (Optional[bool]): Whether to merge the detection from all
             slices or simply concatenate them. If `True`, Non-Maximum Merging (NMM),
-            otherwise Non-Maximum Suppression (NMS),
-            is applied to the final detections.
+            otherwise Non-Maximum Suppression (NMS), is applied to the detections.
+        perform_standard_pred (Optional[bool]): Whether to perform inference on the
+            whole image in addition to the slices to increase the accuracy of
+            large object detection.
         callback (Callable): A function that performs inference on a given image
             slice and returns detections.
         thread_workers (int): Number of threads for parallel execution.
@@ -58,12 +60,14 @@ def __init__(
         overlap_ratio_wh: Tuple[float, float] = (0.2, 0.2),
         iou_threshold: Optional[float] = 0.5,
         merge_detections: Optional[bool] = False,
+        perform_standard_pred: Optional[bool] = False,
         thread_workers: int = 1,
     ):
         self.slice_wh = slice_wh
         self.overlap_ratio_wh = overlap_ratio_wh
         self.iou_threshold = iou_threshold
         self.merge_detections = merge_detections
+        self.perform_standard_pred = perform_standard_pred
         self.callback = callback
         self.thread_workers = thread_workers
         validate_inference_callback(callback=callback)
@@ -115,6 +119,9 @@ def __call__(self, image: np.ndarray) -> Detections:
             for future in as_completed(futures):
                 detections_list.append(future.result())
 
+        if self.perform_standard_pred:
+            detections_list.append(self.callback(image))
+
         if self.merge_detections:
             return Detections.merge(detections_list=detections_list).with_nmm(
                 threshold=self.iou_threshold

From 6f4704625b16ba69068b3a19f6d55bc21c80c434 Mon Sep 17 00:00:00 2001
From: mario-dg <mario.dagraca@gmx.com>
Date: Thu, 19 Oct 2023 00:05:42 +0200
Subject: [PATCH 04/26] Refactored merging of Detection attributes to better
 work with np.ndarrays

---
 supervision/detection/utils.py | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py
index b0414eb44..a79900b4b 100644
--- a/supervision/detection/utils.py
+++ b/supervision/detection/utils.py
@@ -162,8 +162,8 @@ def greedy_nmm(predictions: np.ndarray, threshold: float = 0.5) -> Dict[int, Lis
         xx2 = np.minimum(xx2, x2[idx])
         yy2 = np.minimum(yy2, y2[idx])
 
-        w = np.maximum(0.0, xx2 - xx1)
-        h = np.maximum(0.0, yy2 - yy1)
+        w = np.maximum(0, xx2 - xx1)
+        h = np.maximum(0, yy2 - yy1)
 
         inter = w * h
 
@@ -234,37 +234,39 @@ def get_merged_bbox(bbox1: np.ndarray, bbox2: np.ndarray) -> np.ndarray:
         np.ndarray: A numpy array of shape `(, 4)` where the new
             bounding box is the merged bounding box of `bbox1` and `bbox2`.
     """
-    left_top = np.minimum(bbox1[:2], bbox2[:2])
-    right_bottom = np.maximum(bbox1[2:], bbox2[2:])
-    return np.concatenate([left_top, right_bottom])
+    left_top = np.minimum(bbox1[0][:2], bbox2[0][:2])
+    right_bottom = np.maximum(bbox1[0][2:], bbox2[0][2:])
+    return np.array([np.concatenate([left_top, right_bottom])])
 
 
-def get_merged_class_id(id1: int, id2: int) -> int:
+def get_merged_class_id(id1: np.ndarray, id2: np.ndarray) -> np.ndarray:
     """
     Merges two class ids into one.
 
     Args:
-        id1 (int): The first class id.
-        id2 (int): The second class id.
+        id1 (np.ndarray): The first class id.
+        id2 (np.ndarray): The second class id.
 
     Returns:
-        int: The merged class id.
+        np.ndarray: The merged class id.
     """
-    return max(id1, id2)
+    return np.array([max(id1.item(), id2.item())])
 
 
-def get_merged_confidence(confidence1: float, confidence2: float) -> float:
+def get_merged_confidence(
+    confidence1: np.ndarray, confidence2: np.ndarray
+) -> np.ndarray:
     """
     Merges two confidences into one.
 
     Args:
-        confidence1 (float): The first confidence.
-        confidence2 (float): The second confidence.
+        confidence1 (np.ndarray): The first confidence.
+        confidence2 (np.ndarray): The second confidence.
 
     Returns:
-        float: The merged confidence.
+        np.ndarray: The merged confidence.
     """
-    return max(confidence1, confidence2)
+    return np.array([max(confidence1.item(), confidence2.item())])
 
 
 def get_merged_mask(mask1: np.ndarray, mask2: np.ndarray) -> np.ndarray:

From 166a8da9a07b20852c4559624fe029fc87bc8751 Mon Sep 17 00:00:00 2001
From: mario-dg <mario.dagraca@gmx.com>
Date: Thu, 11 Apr 2024 12:22:44 +0200
Subject: [PATCH 05/26] Implement Feedback

---
 supervision/detection/core.py                 | 154 +++++++++++-------
 .../detection/tools/inference_slicer.py       |  24 +--
 supervision/detection/utils.py                |  69 +-------
 3 files changed, 103 insertions(+), 144 deletions(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 66387087c..a9a4ee92d 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -8,22 +8,17 @@
 
 from supervision.config import CLASS_NAME_DATA_FIELD, ORIENTED_BOX_COORDINATES
 from supervision.detection.utils import (
-    batched_greedy_nmm,
+    batch_non_max_merge,
     box_iou_batch,
     box_non_max_suppression,
     calculate_masks_centroids,
     extract_ultralytics_masks,
     get_data_item,
-    get_merged_bbox,
-    get_merged_class_id,
-    get_merged_confidence,
-    get_merged_mask,
-    get_merged_tracker_id,
-    greedy_nmm,
     is_data_equal,
     mask_non_max_suppression,
     mask_to_xyxy,
     merge_data,
+    non_max_merge,
     process_roboflow_result,
     validate_detections_fields,
     xywh_to_xyxy,
@@ -32,17 +27,57 @@
 from supervision.utils.internal import deprecated
 
 
-def _merge_object_detection_pair(pred1: Detections, pred2: Detections) -> Detections:
-    merged_bbox = get_merged_bbox(pred1.xyxy, pred2.xyxy)
-    merged_conf = get_merged_confidence(pred1.confidence, pred2.confidence)
-    merged_class_id = get_merged_class_id(pred1.class_id, pred2.class_id)
+def _merge_object_detection_pair(det1: Detections, det2: Detections) -> Detections:
+    """
+    Merges two Detections object into a single Detections object.
+
+    A `winning` detection is determined based on the confidence score of the two
+    input detections. This winning detection is then used to specify which `class_id`,
+    `tracker_id`, and `data` to include in the merged Detections object.
+    The resulting `confidence` of the merged object is calculated by the weighted
+    contribution of each detection to the merged object.
+    The bounding boxes and masks of the two input detections are merged into a single
+    bounding box and mask, respectively.
+
+    Args:
+        det1 (Detections):
+            The first Detections object
+        det2 (Detections):
+            The second Detections object
+
+    Returns:
+        Detections: A new Detections object, with merged attributes.
+    """
+    assert (
+        len(det1) == len(det2) == 1
+    ), "Both Detections should have exactly 1 detected object."
+    winning_det = det1 if det1.confidence.item() > det2.confidence.item() else det2
+
+    area_det1 = (det1.xyxy[0][2] - det1.xyxy[0][0]) * (
+        det1.xyxy[0][3] - det1.xyxy[0][1]
+    )
+    area_det2 = (det2.xyxy[0][2] - det2.xyxy[0][0]) * (
+        det2.xyxy[0][3] - det2.xyxy[0][1]
+    )
+    merged_x1, merged_y1 = np.minimum(det1.xyxy[0][:2], det2.xyxy[0][:2])
+    merged_x2, merged_y2 = np.maximum(det1.xyxy[0][2:], det2.xyxy[0][2:])
+    merged_area = (merged_x2 - merged_x1) * (merged_y2 - merged_y1)
+
+    merged_conf = (
+        area_det1 * det1.confidence.item() + area_det2 * det2.confidence.item()
+    ) / merged_area
+    merged_bbox = [np.concatenate([merged_x1, merged_y1, merged_x2, merged_y2])]
+    merged_class_id = winning_det.class_id.item()
     merged_tracker_id = None
     merged_mask = None
+    merged_data = None
 
-    if pred1.mask and pred2.mask:
-        merged_mask = get_merged_mask(pred1.mask, pred2.mask)
-    if pred1.tracker_id and pred2.tracker_id:
-        merged_tracker_id = get_merged_tracker_id(pred1.tracker_id, pred2.tracker_id)
+    if det1.mask and det2.mask:
+        merged_mask = np.logical_or(det1.mask, det2.mask)
+    if det1.tracker_id and det2.tracker_id:
+        merged_tracker_id = winning_det.tracker_id.item()
+    if det1.data and det2.data:
+        merged_data = winning_det.data
 
     return Detections(
         xyxy=merged_bbox,
@@ -50,6 +85,7 @@ def _merge_object_detection_pair(pred1: Detections, pred2: Detections) -> Detect
         confidence=merged_conf,
         class_id=merged_class_id,
         tracker_id=merged_tracker_id,
+        data=merged_data,
     )
 
 
@@ -1091,22 +1127,24 @@ def box_area(self) -> np.ndarray:
         """
         return (self.xyxy[:, 3] - self.xyxy[:, 1]) * (self.xyxy[:, 2] - self.xyxy[:, 0])
 
-    def with_nmm(
+    def with_nms(
         self, threshold: float = 0.5, class_agnostic: bool = False
     ) -> Detections:
         """
-        Perform non-maximum merging on the current set of object detections.
+        Performs non-max suppression on detection set. If the detections result
+        from a segmentation model, the IoU mask is applied. Otherwise, box IoU is used.
 
         Args:
             threshold (float, optional): The intersection-over-union threshold
-                to use for non-maximum merging. Defaults to 0.5.
+                to use for non-maximum suppression. I'm the lower the value the more
+                restrictive the NMS becomes. Defaults to 0.5.
             class_agnostic (bool, optional): Whether to perform class-agnostic
-                non-maximum merging. If True, the class_id of each detection
+                non-maximum suppression. If True, the class_id of each detection
                 will be ignored. Defaults to False.
 
         Returns:
             Detections: A new Detections object containing the subset of detections
-                after non-maximum merging.
+                after non-maximum suppression.
 
         Raises:
             AssertionError: If `confidence` is None and class_agnostic is False.
@@ -1115,16 +1153,17 @@ def with_nmm(
         if len(self) == 0:
             return self
 
-        assert 0.0 <= threshold <= 1.0, "Threshold must be between 0 and 1."
-
         assert (
             self.confidence is not None
-        ), "Detections confidence must be given for NMM to be executed."
+        ), "Detections confidence must be given for NMS to be executed."
 
         if class_agnostic:
             predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))
-            keep_to_merge_list = greedy_nmm(predictions, threshold)
         else:
+            assert self.class_id is not None, (
+                "Detections class_id must be given for NMS to be executed. If you"
+                " intended to perform class agnostic NMS set class_agnostic=True."
+            )
             predictions = np.hstack(
                 (
                     self.xyxy,
@@ -1132,41 +1171,34 @@ def with_nmm(
                     self.class_id.reshape(-1, 1),
                 )
             )
-            keep_to_merge_list = batched_greedy_nmm(predictions, threshold)
-
-        result = []
 
-        for keep_ind, merge_ind_list in keep_to_merge_list.items():
-            for merge_ind in merge_ind_list:
-                if (
-                    box_iou_batch(self[keep_ind].xyxy, self[merge_ind].xyxy).item()
-                    > threshold
-                ):
-                    self[keep_ind] = _merge_object_detection_pair(
-                        self[keep_ind], self[merge_ind]
-                    )
-            result.append(self[keep_ind])
+        if self.mask is not None:
+            indices = mask_non_max_suppression(
+                predictions=predictions, masks=self.mask, iou_threshold=threshold
+            )
+        else:
+            indices = box_non_max_suppression(
+                predictions=predictions, iou_threshold=threshold
+            )
 
-        return Detections.merge(result)
+        return self[indices]
 
-    def with_nms(
+    def with_nmm(
         self, threshold: float = 0.5, class_agnostic: bool = False
     ) -> Detections:
         """
-        Performs non-max suppression on detection set. If the detections result
-        from a segmentation model, the IoU mask is applied. Otherwise, box IoU is used.
+        Perform non-maximum merging on the current set of object detections.
 
         Args:
             threshold (float, optional): The intersection-over-union threshold
-                to use for non-maximum suppression. I'm the lower the value the more
-                restrictive the NMS becomes. Defaults to 0.5.
+                to use for non-maximum merging. Defaults to 0.5.
             class_agnostic (bool, optional): Whether to perform class-agnostic
-                non-maximum suppression. If True, the class_id of each detection
+                non-maximum merging. If True, the class_id of each detection
                 will be ignored. Defaults to False.
 
         Returns:
             Detections: A new Detections object containing the subset of detections
-                after non-maximum suppression.
+                after non-maximum merging.
 
         Raises:
             AssertionError: If `confidence` is None and class_agnostic is False.
@@ -1175,17 +1207,16 @@ def with_nms(
         if len(self) == 0:
             return self
 
+        assert 0.0 <= threshold <= 1.0, "Threshold must be between 0 and 1."
+
         assert (
             self.confidence is not None
-        ), "Detections confidence must be given for NMS to be executed."
+        ), "Detections confidence must be given for NMM to be executed."
 
         if class_agnostic:
             predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))
+            keep_to_merge_list = non_max_merge(predictions, threshold)
         else:
-            assert self.class_id is not None, (
-                "Detections class_id must be given for NMS to be executed. If you"
-                " intended to perform class agnostic NMS set class_agnostic=True."
-            )
             predictions = np.hstack(
                 (
                     self.xyxy,
@@ -1193,14 +1224,19 @@ def with_nms(
                     self.class_id.reshape(-1, 1),
                 )
             )
+            keep_to_merge_list = batch_non_max_merge(predictions, threshold)
 
-        if self.mask is not None:
-            indices = mask_non_max_suppression(
-                predictions=predictions, masks=self.mask, iou_threshold=threshold
-            )
-        else:
-            indices = box_non_max_suppression(
-                predictions=predictions, iou_threshold=threshold
-            )
+        result = []
 
-        return self[indices]
+        for keep_ind, merge_ind_list in keep_to_merge_list.items():
+            for merge_ind in merge_ind_list:
+                if (
+                    box_iou_batch(self[keep_ind].xyxy, self[merge_ind].xyxy).item()
+                    > threshold
+                ):
+                    self[keep_ind] = _merge_object_detection_pair(
+                        self[keep_ind], self[merge_ind]
+                    )
+            result.append(self[keep_ind])
+
+        return Detections.merge(result)
diff --git a/supervision/detection/tools/inference_slicer.py b/supervision/detection/tools/inference_slicer.py
index 2aff9f6de..7157723f9 100644
--- a/supervision/detection/tools/inference_slicer.py
+++ b/supervision/detection/tools/inference_slicer.py
@@ -36,12 +36,6 @@ class InferenceSlicer:
             slices in the format `(width_ratio, height_ratio)`.
         iou_threshold (Optional[float]): Intersection over Union (IoU) threshold
             used for non-max suppression.
-        merge_detections (Optional[bool]): Whether to merge the detection from all
-            slices or simply concatenate them. If `True`, Non-Maximum Merging (NMM),
-            otherwise Non-Maximum Suppression (NMS), is applied to the detections.
-        perform_standard_pred (Optional[bool]): Whether to perform inference on the
-            whole image in addition to the slices to increase the accuracy of
-            large object detection.
         callback (Callable): A function that performs inference on a given image
             slice and returns detections.
         thread_workers (int): Number of threads for parallel execution.
@@ -59,15 +53,11 @@ def __init__(
         slice_wh: Tuple[int, int] = (320, 320),
         overlap_ratio_wh: Tuple[float, float] = (0.2, 0.2),
         iou_threshold: Optional[float] = 0.5,
-        merge_detections: Optional[bool] = False,
-        perform_standard_pred: Optional[bool] = False,
         thread_workers: int = 1,
     ):
         self.slice_wh = slice_wh
         self.overlap_ratio_wh = overlap_ratio_wh
         self.iou_threshold = iou_threshold
-        self.merge_detections = merge_detections
-        self.perform_standard_pred = perform_standard_pred
         self.callback = callback
         self.thread_workers = thread_workers
 
@@ -118,17 +108,9 @@ def callback(image_slice: np.ndarray) -> sv.Detections:
             for future in as_completed(futures):
                 detections_list.append(future.result())
 
-        if self.perform_standard_pred:
-            detections_list.append(self.callback(image))
-
-        if self.merge_detections:
-            return Detections.merge(detections_list=detections_list).with_nmm(
-                threshold=self.iou_threshold
-            )
-        else:
-            return Detections.merge(detections_list=detections_list).with_nms(
-                threshold=self.iou_threshold
-            )
+        return Detections.merge(detections_list=detections_list).with_nms(
+            threshold=self.iou_threshold
+        )
 
     def _run_callback(self, image, offset) -> Detections:
         """
diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py
index b9edb9d63..9e732aeb4 100644
--- a/supervision/detection/utils.py
+++ b/supervision/detection/utils.py
@@ -274,7 +274,9 @@ def box_non_max_suppression(
     return keep[sort_index.argsort()]
 
 
-def greedy_nmm(predictions: np.ndarray, threshold: float = 0.5) -> Dict[int, List[int]]:
+def non_max_merge(
+    predictions: np.ndarray, threshold: float = 0.5
+) -> Dict[int, List[int]]:
     """
     Apply greedy version of non-maximum merging to avoid detecting too many
     overlapping bounding boxes for a given object.
@@ -351,7 +353,7 @@ def greedy_nmm(predictions: np.ndarray, threshold: float = 0.5) -> Dict[int, Lis
     return keep_to_merge_list
 
 
-def batched_greedy_nmm(
+def batch_non_max_merge(
     predictions: np.ndarray, threshold: float = 0.5
 ) -> Dict[int, List[int]]:
     """
@@ -373,7 +375,7 @@ def batched_greedy_nmm(
     keep_to_merge_list = {}
     for category_id in np.unique(category_ids):
         curr_indices = np.where(category_ids == category_id)[0]
-        curr_keep_to_merge_list = greedy_nmm(predictions[curr_indices], threshold)
+        curr_keep_to_merge_list = non_max_merge(predictions[curr_indices], threshold)
         curr_indices_list = curr_indices.tolist()
         for curr_keep, curr_merge_list in curr_keep_to_merge_list.items():
             keep = curr_indices_list[curr_keep]
@@ -403,67 +405,6 @@ def get_merged_bbox(bbox1: np.ndarray, bbox2: np.ndarray) -> np.ndarray:
     return np.array([np.concatenate([left_top, right_bottom])])
 
 
-def get_merged_class_id(id1: np.ndarray, id2: np.ndarray) -> np.ndarray:
-    """
-    Merges two class ids into one.
-
-    Args:
-        id1 (np.ndarray): The first class id.
-        id2 (np.ndarray): The second class id.
-
-    Returns:
-        np.ndarray: The merged class id.
-    """
-    return np.array([max(id1.item(), id2.item())])
-
-
-def get_merged_confidence(
-    confidence1: np.ndarray, confidence2: np.ndarray
-) -> np.ndarray:
-    """
-    Merges two confidences into one.
-
-    Args:
-        confidence1 (np.ndarray): The first confidence.
-        confidence2 (np.ndarray): The second confidence.
-
-    Returns:
-        np.ndarray: The merged confidence.
-    """
-    return np.array([max(confidence1.item(), confidence2.item())])
-
-
-def get_merged_mask(mask1: np.ndarray, mask2: np.ndarray) -> np.ndarray:
-    """
-    Merges two masks into one.
-
-    Args:
-        mask1 (np.ndarray): A numpy array of shape `(H, W)` where `H` and `W`
-            are the height and width of the mask, respectively.
-        mask2 (np.ndarray): A numpy array of shape `(H, W)` where `H` and `W`
-            are the height and width of the mask, respectively.
-
-    Returns:
-        np.ndarray: A numpy array of shape `(H, W)` where the new mask is the
-            merged mask of `mask1` and `mask2`.
-    """
-    return np.logical_or(mask1, mask2)
-
-
-def get_merged_tracker_id(tracker_id1: int, tracker_id2: int) -> int:
-    """
-    Merges two tracker ids into one.
-
-    Args:
-        tracker_id1 (int): The first tracker id.
-        tracker_id2 (int): The second tracker id.
-
-    Returns:
-        int: The merged tracker id.
-    """
-    return max(tracker_id1, tracker_id2)
-
-
 def clip_boxes(xyxy: np.ndarray, resolution_wh: Tuple[int, int]) -> np.ndarray:
     """
     Clips bounding boxes coordinates to fit within the frame resolution.

From d7e52bee264fb1b3b5c47a3f27b5eb67deae86a6 Mon Sep 17 00:00:00 2001
From: Linas Kondrackis <linas.ko+dev@skiff.com>
Date: Mon, 6 May 2024 17:20:31 +0300
Subject: [PATCH 06/26] NMM: Add None-checks, fix area normalization, style

---
 supervision/detection/core.py | 181 +++++++++++++++++++++++++---------
 1 file changed, 132 insertions(+), 49 deletions(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index b60e33632..3d1c135a3 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -30,14 +30,16 @@
 def _merge_object_detection_pair(det1: Detections, det2: Detections) -> Detections:
     """
     Merges two Detections object into a single Detections object.
+    Assumes each Detections contains exactly one object.
 
     A `winning` detection is determined based on the confidence score of the two
-    input detections. This winning detection is then used to specify which `class_id`,
-    `tracker_id`, and `data` to include in the merged Detections object.
+    input detections. This winning detection is then used to specify which
+    `class_id`, `tracker_id`, and `data` to include in the merged Detections object.
+
     The resulting `confidence` of the merged object is calculated by the weighted
     contribution of each detection to the merged object.
-    The bounding boxes and masks of the two input detections are merged into a single
-    bounding box and mask, respectively.
+    The bounding boxes and masks of the two input detections are merged into a
+    single bounding box and mask, respectively.
 
     Args:
         det1 (Detections):
@@ -47,11 +49,39 @@ def _merge_object_detection_pair(det1: Detections, det2: Detections) -> Detectio
 
     Returns:
         Detections: A new Detections object, with merged attributes.
+
+    Raises:
+        ValueError: If the input Detections objects do not have exactly 1 detected
+            object.
+
+    Example:
+        ```python
+        import cv2
+        import supervision as sv
+        from inference import get_model
+
+        image = cv2.imread(<SOURCE_IMAGE_PATH>)
+        model = get_model(model_id="yolov8s-640")
+
+        result = model.infer(image)[0]
+        detections = sv.Detections.from_inference(result)
+
+        merged_detections = merge_object_detection_pair(
+            detections[0], detections[1])
+        ```
     """
-    assert (
-        len(det1) == len(det2) == 1
-    ), "Both Detections should have exactly 1 detected object."
-    winning_det = det1 if det1.confidence.item() > det2.confidence.item() else det2
+    if len(det1) != 1 or len(det2) != 1:
+        raise ValueError(
+            "Both Detections should have exactly 1 detected object.")
+
+    if det2.confidence is None:
+        winning_det = det1
+    elif det1.confidence is None:
+        winning_det = det2
+    elif det1.confidence[0] >= det2.confidence[0]:
+        winning_det = det1
+    else:
+        winning_det = det2
 
     area_det1 = (det1.xyxy[0][2] - det1.xyxy[0][0]) * (
         det1.xyxy[0][3] - det1.xyxy[0][1]
@@ -59,33 +89,39 @@ def _merge_object_detection_pair(det1: Detections, det2: Detections) -> Detectio
     area_det2 = (det2.xyxy[0][2] - det2.xyxy[0][0]) * (
         det2.xyxy[0][3] - det2.xyxy[0][1]
     )
+
     merged_x1, merged_y1 = np.minimum(det1.xyxy[0][:2], det2.xyxy[0][:2])
     merged_x2, merged_y2 = np.maximum(det1.xyxy[0][2:], det2.xyxy[0][2:])
-    merged_area = (merged_x2 - merged_x1) * (merged_y2 - merged_y1)
-
-    merged_conf = (
-        area_det1 * det1.confidence.item() + area_det2 * det2.confidence.item()
-    ) / merged_area
-    merged_bbox = [np.concatenate([merged_x1, merged_y1, merged_x2, merged_y2])]
-    merged_class_id = winning_det.class_id.item()
-    merged_tracker_id = None
-    merged_mask = None
-    merged_data = None
 
-    if det1.mask and det2.mask:
+    merged_xy = np.array([[merged_x1, merged_y1, merged_x2, merged_y2]])
+
+    winning_class_id = winning_det.class_id
+
+    if det1.confidence is None or det2.confidence is None:
+        merged_confidence = None
+    else:
+        merged_confidence = (
+            area_det1 * det1.confidence[0] + area_det2 * det2.confidence[0]
+        ) / (area_det1 + area_det2)
+        merged_confidence = np.array([merged_confidence])
+
+    merged_mask = None
+    if det1.mask is not None and det2.mask is not None:
         merged_mask = np.logical_or(det1.mask, det2.mask)
-    if det1.tracker_id and det2.tracker_id:
-        merged_tracker_id = winning_det.tracker_id.item()
+
+    winning_tracker_id = winning_det.tracker_id
+
+    winning_data = None
     if det1.data and det2.data:
-        merged_data = winning_det.data
+        winning_data = winning_det.data
 
     return Detections(
-        xyxy=merged_bbox,
+        xyxy=merged_xy,
         mask=merged_mask,
-        confidence=merged_conf,
-        class_id=merged_class_id,
-        tracker_id=merged_tracker_id,
-        data=merged_data,
+        confidence=merged_confidence,
+        class_id=winning_class_id,
+        tracker_id=winning_tracker_id,
+        data=winning_data,
     )
 
 
@@ -260,7 +296,8 @@ def from_yolov5(cls, yolov5_results) -> Detections:
             detections = sv.Detections.from_yolov5(result)
             ```
         """
-        yolov5_detections_predictions = yolov5_results.pred[0].cpu().cpu().numpy()
+        yolov5_detections_predictions = yolov5_results.pred[0].cpu(
+        ).cpu().numpy()
 
         return cls(
             xyxy=yolov5_detections_predictions[:, :4],
@@ -307,7 +344,8 @@ def from_ultralytics(cls, ultralytics_results) -> Detections:
 
         if "obb" in ultralytics_results and ultralytics_results.obb is not None:
             class_id = ultralytics_results.obb.cls.cpu().numpy().astype(int)
-            class_names = np.array([ultralytics_results.names[i] for i in class_id])
+            class_names = np.array(
+                [ultralytics_results.names[i] for i in class_id])
             oriented_box_coordinates = ultralytics_results.obb.xyxyxyxy.cpu().numpy()
             return cls(
                 xyxy=ultralytics_results.obb.xyxy.cpu().numpy(),
@@ -323,7 +361,8 @@ def from_ultralytics(cls, ultralytics_results) -> Detections:
             )
 
         class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int)
-        class_names = np.array([ultralytics_results.names[i] for i in class_id])
+        class_names = np.array([ultralytics_results.names[i]
+                               for i in class_id])
         return cls(
             xyxy=ultralytics_results.boxes.xyxy.cpu().numpy(),
             confidence=ultralytics_results.boxes.conf.cpu().numpy(),
@@ -411,7 +450,8 @@ def from_tensorflow(
         return cls(
             xyxy=boxes,
             confidence=tensorflow_results["detection_scores"][0].numpy(),
-            class_id=tensorflow_results["detection_classes"][0].numpy().astype(int),
+            class_id=tensorflow_results["detection_classes"][0].numpy().astype(
+                int),
         )
 
     @classmethod
@@ -448,7 +488,8 @@ def from_deepsparse(cls, deepsparse_results) -> Detections:
         return cls(
             xyxy=np.array(deepsparse_results.boxes[0]),
             confidence=np.array(deepsparse_results.scores[0]),
-            class_id=np.array(deepsparse_results.labels[0]).astype(float).astype(int),
+            class_id=np.array(deepsparse_results.labels[0]).astype(
+                float).astype(int),
         )
 
     @classmethod
@@ -535,24 +576,29 @@ class names. If provided, the resulting Detections object will contain
             Class names values can be accessed using `detections["class_name"]`.
         """  # noqa: E501 // docs
 
-        class_ids = transformers_results["labels"].cpu().detach().numpy().astype(int)
+        class_ids = transformers_results["labels"].cpu(
+        ).detach().numpy().astype(int)
         data = {}
         if id2label is not None:
-            class_names = np.array([id2label[class_id] for class_id in class_ids])
+            class_names = np.array([id2label[class_id]
+                                   for class_id in class_ids])
             data[CLASS_NAME_DATA_FIELD] = class_names
         if "boxes" in transformers_results:
             return cls(
                 xyxy=transformers_results["boxes"].cpu().detach().numpy(),
-                confidence=transformers_results["scores"].cpu().detach().numpy(),
+                confidence=transformers_results["scores"].cpu(
+                ).detach().numpy(),
                 class_id=class_ids,
                 data=data,
             )
         elif "masks" in transformers_results:
-            masks = transformers_results["masks"].cpu().detach().numpy().astype(bool)
+            masks = transformers_results["masks"].cpu(
+            ).detach().numpy().astype(bool)
             return cls(
                 xyxy=mask_to_xyxy(masks),
                 mask=masks,
-                confidence=transformers_results["scores"].cpu().detach().numpy(),
+                confidence=transformers_results["scores"].cpu(
+                ).detach().numpy(),
                 class_id=class_ids,
                 data=data,
             )
@@ -595,7 +641,8 @@ class IDs, and confidences of the predictions.
         """
 
         return cls(
-            xyxy=detectron2_results["instances"].pred_boxes.tensor.cpu().numpy(),
+            xyxy=detectron2_results["instances"].pred_boxes.tensor.cpu(
+            ).numpy(),
             confidence=detectron2_results["instances"].scores.cpu().numpy(),
             class_id=detectron2_results["instances"]
             .pred_classes.cpu()
@@ -638,7 +685,8 @@ def from_inference(cls, roboflow_result: Union[dict, Any]) -> Detections:
             Class names values can be accessed using `detections["class_name"]`.
         """
         with suppress(AttributeError):
-            roboflow_result = roboflow_result.dict(exclude_none=True, by_alias=True)
+            roboflow_result = roboflow_result.dict(
+                exclude_none=True, by_alias=True)
         xyxy, confidence, class_id, masks, trackers, data = process_roboflow_result(
             roboflow_result=roboflow_result
         )
@@ -730,7 +778,8 @@ def from_sam(cls, sam_result: List[dict]) -> Detections:
         )
 
         xywh = np.array([mask["bbox"] for mask in sorted_generated_masks])
-        mask = np.array([mask["segmentation"] for mask in sorted_generated_masks])
+        mask = np.array([mask["segmentation"]
+                        for mask in sorted_generated_masks])
 
         if np.asarray(xywh).shape[0] == 0:
             return cls.empty()
@@ -957,7 +1006,8 @@ def stack_or_none(name: str):
             if all(d.__getattribute__(name) is None for d in detections_list):
                 return None
             if any(d.__getattribute__(name) is None for d in detections_list):
-                raise ValueError(f"All or none of the '{name}' fields must be None")
+                raise ValueError(
+                    f"All or none of the '{name}' fields must be None")
             return (
                 np.vstack([d.__getattribute__(name) for d in detections_list])
                 if name == "mask"
@@ -1128,6 +1178,34 @@ def __setitem__(self, key: str, value: Union[np.ndarray, List]):
 
         self.data[key] = value
 
+    def _set_at_index(self, index: int, other: Detections):
+        """
+        Set detection values (xyxy, confidence, ...) at a specified index
+        to those of another Detections object, at index 0.
+
+        Args:
+            index (int): The index in current detection, where values
+                will be set.
+            other (Detections): Detections object with exactly one element
+                to set the values from.
+
+        Raises:
+            ValueError: If `other` is not made of exactly one element.
+        """
+        if len(other) != 1:
+            raise ValueError(
+                "Detection to set from must have exactly one element.")
+
+        self.xyxy[index] = other.xyxy[0]
+        if self.mask is not None and other.mask is not None:
+            self.mask[index] = other.mask[0]
+        if self.confidence is not None and other.confidence is not None:
+            self.confidence[index] = other.confidence[0]
+        if self.class_id is not None and other.class_id is not None:
+            self.class_id[index] = other.class_id[0]
+        if self.tracker_id is not None and other.tracker_id is not None:
+            self.tracker_id[index] = other.tracker_id[0]
+
     @property
     def area(self) -> np.ndarray:
         """
@@ -1188,7 +1266,8 @@ def with_nms(
         ), "Detections confidence must be given for NMS to be executed."
 
         if class_agnostic:
-            predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))
+            predictions = np.hstack(
+                (self.xyxy, self.confidence.reshape(-1, 1)))
         else:
             assert self.class_id is not None, (
                 "Detections class_id must be given for NMS to be executed. If you"
@@ -1244,9 +1323,14 @@ def with_nmm(
         ), "Detections confidence must be given for NMM to be executed."
 
         if class_agnostic:
-            predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))
+            predictions = np.hstack(
+                (self.xyxy, self.confidence.reshape(-1, 1)))
             keep_to_merge_list = non_max_merge(predictions, threshold)
         else:
+            assert self.class_id is not None, (
+                "Detections class_id must be given for NMS to be executed. If you"
+                " intended to perform class agnostic NMM set class_agnostic=True."
+            )
             predictions = np.hstack(
                 (
                     self.xyxy,
@@ -1257,16 +1341,15 @@ def with_nmm(
             keep_to_merge_list = batch_non_max_merge(predictions, threshold)
 
         result = []
-
         for keep_ind, merge_ind_list in keep_to_merge_list.items():
             for merge_ind in merge_ind_list:
-                if (
-                    box_iou_batch(self[keep_ind].xyxy, self[merge_ind].xyxy).item()
-                    > threshold
-                ):
-                    self[keep_ind] = _merge_object_detection_pair(
+                box_iou = box_iou_batch(
+                    self[keep_ind].xyxy, self[merge_ind].xyxy)[0]
+                if box_iou > threshold:
+                    merged_detection = _merge_object_detection_pair(
                         self[keep_ind], self[merge_ind]
                     )
+                    self._set_at_index(keep_ind, merged_detection)
             result.append(self[keep_ind])
 
         return Detections.merge(result)

From bee3252110887fe941028ef696ebe0f36eae3b7e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 6 May 2024 14:22:31 +0000
Subject: [PATCH 07/26] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?=
 =?UTF-8?q?=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/detection/core.py | 57 ++++++++++++-----------------------
 1 file changed, 19 insertions(+), 38 deletions(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 3d1c135a3..fa34c158d 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -71,8 +71,7 @@ def _merge_object_detection_pair(det1: Detections, det2: Detections) -> Detectio
         ```
     """
     if len(det1) != 1 or len(det2) != 1:
-        raise ValueError(
-            "Both Detections should have exactly 1 detected object.")
+        raise ValueError("Both Detections should have exactly 1 detected object.")
 
     if det2.confidence is None:
         winning_det = det1
@@ -296,8 +295,7 @@ def from_yolov5(cls, yolov5_results) -> Detections:
             detections = sv.Detections.from_yolov5(result)
             ```
         """
-        yolov5_detections_predictions = yolov5_results.pred[0].cpu(
-        ).cpu().numpy()
+        yolov5_detections_predictions = yolov5_results.pred[0].cpu().cpu().numpy()
 
         return cls(
             xyxy=yolov5_detections_predictions[:, :4],
@@ -344,8 +342,7 @@ def from_ultralytics(cls, ultralytics_results) -> Detections:
 
         if "obb" in ultralytics_results and ultralytics_results.obb is not None:
             class_id = ultralytics_results.obb.cls.cpu().numpy().astype(int)
-            class_names = np.array(
-                [ultralytics_results.names[i] for i in class_id])
+            class_names = np.array([ultralytics_results.names[i] for i in class_id])
             oriented_box_coordinates = ultralytics_results.obb.xyxyxyxy.cpu().numpy()
             return cls(
                 xyxy=ultralytics_results.obb.xyxy.cpu().numpy(),
@@ -361,8 +358,7 @@ def from_ultralytics(cls, ultralytics_results) -> Detections:
             )
 
         class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int)
-        class_names = np.array([ultralytics_results.names[i]
-                               for i in class_id])
+        class_names = np.array([ultralytics_results.names[i] for i in class_id])
         return cls(
             xyxy=ultralytics_results.boxes.xyxy.cpu().numpy(),
             confidence=ultralytics_results.boxes.conf.cpu().numpy(),
@@ -450,8 +446,7 @@ def from_tensorflow(
         return cls(
             xyxy=boxes,
             confidence=tensorflow_results["detection_scores"][0].numpy(),
-            class_id=tensorflow_results["detection_classes"][0].numpy().astype(
-                int),
+            class_id=tensorflow_results["detection_classes"][0].numpy().astype(int),
         )
 
     @classmethod
@@ -488,8 +483,7 @@ def from_deepsparse(cls, deepsparse_results) -> Detections:
         return cls(
             xyxy=np.array(deepsparse_results.boxes[0]),
             confidence=np.array(deepsparse_results.scores[0]),
-            class_id=np.array(deepsparse_results.labels[0]).astype(
-                float).astype(int),
+            class_id=np.array(deepsparse_results.labels[0]).astype(float).astype(int),
         )
 
     @classmethod
@@ -576,29 +570,24 @@ class names. If provided, the resulting Detections object will contain
             Class names values can be accessed using `detections["class_name"]`.
         """  # noqa: E501 // docs
 
-        class_ids = transformers_results["labels"].cpu(
-        ).detach().numpy().astype(int)
+        class_ids = transformers_results["labels"].cpu().detach().numpy().astype(int)
         data = {}
         if id2label is not None:
-            class_names = np.array([id2label[class_id]
-                                   for class_id in class_ids])
+            class_names = np.array([id2label[class_id] for class_id in class_ids])
             data[CLASS_NAME_DATA_FIELD] = class_names
         if "boxes" in transformers_results:
             return cls(
                 xyxy=transformers_results["boxes"].cpu().detach().numpy(),
-                confidence=transformers_results["scores"].cpu(
-                ).detach().numpy(),
+                confidence=transformers_results["scores"].cpu().detach().numpy(),
                 class_id=class_ids,
                 data=data,
             )
         elif "masks" in transformers_results:
-            masks = transformers_results["masks"].cpu(
-            ).detach().numpy().astype(bool)
+            masks = transformers_results["masks"].cpu().detach().numpy().astype(bool)
             return cls(
                 xyxy=mask_to_xyxy(masks),
                 mask=masks,
-                confidence=transformers_results["scores"].cpu(
-                ).detach().numpy(),
+                confidence=transformers_results["scores"].cpu().detach().numpy(),
                 class_id=class_ids,
                 data=data,
             )
@@ -641,8 +630,7 @@ class IDs, and confidences of the predictions.
         """
 
         return cls(
-            xyxy=detectron2_results["instances"].pred_boxes.tensor.cpu(
-            ).numpy(),
+            xyxy=detectron2_results["instances"].pred_boxes.tensor.cpu().numpy(),
             confidence=detectron2_results["instances"].scores.cpu().numpy(),
             class_id=detectron2_results["instances"]
             .pred_classes.cpu()
@@ -685,8 +673,7 @@ def from_inference(cls, roboflow_result: Union[dict, Any]) -> Detections:
             Class names values can be accessed using `detections["class_name"]`.
         """
         with suppress(AttributeError):
-            roboflow_result = roboflow_result.dict(
-                exclude_none=True, by_alias=True)
+            roboflow_result = roboflow_result.dict(exclude_none=True, by_alias=True)
         xyxy, confidence, class_id, masks, trackers, data = process_roboflow_result(
             roboflow_result=roboflow_result
         )
@@ -778,8 +765,7 @@ def from_sam(cls, sam_result: List[dict]) -> Detections:
         )
 
         xywh = np.array([mask["bbox"] for mask in sorted_generated_masks])
-        mask = np.array([mask["segmentation"]
-                        for mask in sorted_generated_masks])
+        mask = np.array([mask["segmentation"] for mask in sorted_generated_masks])
 
         if np.asarray(xywh).shape[0] == 0:
             return cls.empty()
@@ -1006,8 +992,7 @@ def stack_or_none(name: str):
             if all(d.__getattribute__(name) is None for d in detections_list):
                 return None
             if any(d.__getattribute__(name) is None for d in detections_list):
-                raise ValueError(
-                    f"All or none of the '{name}' fields must be None")
+                raise ValueError(f"All or none of the '{name}' fields must be None")
             return (
                 np.vstack([d.__getattribute__(name) for d in detections_list])
                 if name == "mask"
@@ -1193,8 +1178,7 @@ def _set_at_index(self, index: int, other: Detections):
             ValueError: If `other` is not made of exactly one element.
         """
         if len(other) != 1:
-            raise ValueError(
-                "Detection to set from must have exactly one element.")
+            raise ValueError("Detection to set from must have exactly one element.")
 
         self.xyxy[index] = other.xyxy[0]
         if self.mask is not None and other.mask is not None:
@@ -1266,8 +1250,7 @@ def with_nms(
         ), "Detections confidence must be given for NMS to be executed."
 
         if class_agnostic:
-            predictions = np.hstack(
-                (self.xyxy, self.confidence.reshape(-1, 1)))
+            predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))
         else:
             assert self.class_id is not None, (
                 "Detections class_id must be given for NMS to be executed. If you"
@@ -1323,8 +1306,7 @@ def with_nmm(
         ), "Detections confidence must be given for NMM to be executed."
 
         if class_agnostic:
-            predictions = np.hstack(
-                (self.xyxy, self.confidence.reshape(-1, 1)))
+            predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))
             keep_to_merge_list = non_max_merge(predictions, threshold)
         else:
             assert self.class_id is not None, (
@@ -1343,8 +1325,7 @@ def with_nmm(
         result = []
         for keep_ind, merge_ind_list in keep_to_merge_list.items():
             for merge_ind in merge_ind_list:
-                box_iou = box_iou_batch(
-                    self[keep_ind].xyxy, self[merge_ind].xyxy)[0]
+                box_iou = box_iou_batch(self[keep_ind].xyxy, self[merge_ind].xyxy)[0]
                 if box_iou > threshold:
                     merged_detection = _merge_object_detection_pair(
                         self[keep_ind], self[merge_ind]

From 97c407101a2755db3288613c97cbbcda4e8105c0 Mon Sep 17 00:00:00 2001
From: Linas Kondrackis <linas.ko+dev@skiff.com>
Date: Mon, 6 May 2024 17:24:41 +0300
Subject: [PATCH 08/26] NMM: Move detections merge into Detections class.

* No other changes!
---
 supervision/detection/core.py | 251 ++++++++++++++++++----------------
 1 file changed, 135 insertions(+), 116 deletions(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index fa34c158d..501a27e9d 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -27,103 +27,6 @@
 from supervision.validators import validate_detections_fields
 
 
-def _merge_object_detection_pair(det1: Detections, det2: Detections) -> Detections:
-    """
-    Merges two Detections object into a single Detections object.
-    Assumes each Detections contains exactly one object.
-
-    A `winning` detection is determined based on the confidence score of the two
-    input detections. This winning detection is then used to specify which
-    `class_id`, `tracker_id`, and `data` to include in the merged Detections object.
-
-    The resulting `confidence` of the merged object is calculated by the weighted
-    contribution of each detection to the merged object.
-    The bounding boxes and masks of the two input detections are merged into a
-    single bounding box and mask, respectively.
-
-    Args:
-        det1 (Detections):
-            The first Detections object
-        det2 (Detections):
-            The second Detections object
-
-    Returns:
-        Detections: A new Detections object, with merged attributes.
-
-    Raises:
-        ValueError: If the input Detections objects do not have exactly 1 detected
-            object.
-
-    Example:
-        ```python
-        import cv2
-        import supervision as sv
-        from inference import get_model
-
-        image = cv2.imread(<SOURCE_IMAGE_PATH>)
-        model = get_model(model_id="yolov8s-640")
-
-        result = model.infer(image)[0]
-        detections = sv.Detections.from_inference(result)
-
-        merged_detections = merge_object_detection_pair(
-            detections[0], detections[1])
-        ```
-    """
-    if len(det1) != 1 or len(det2) != 1:
-        raise ValueError("Both Detections should have exactly 1 detected object.")
-
-    if det2.confidence is None:
-        winning_det = det1
-    elif det1.confidence is None:
-        winning_det = det2
-    elif det1.confidence[0] >= det2.confidence[0]:
-        winning_det = det1
-    else:
-        winning_det = det2
-
-    area_det1 = (det1.xyxy[0][2] - det1.xyxy[0][0]) * (
-        det1.xyxy[0][3] - det1.xyxy[0][1]
-    )
-    area_det2 = (det2.xyxy[0][2] - det2.xyxy[0][0]) * (
-        det2.xyxy[0][3] - det2.xyxy[0][1]
-    )
-
-    merged_x1, merged_y1 = np.minimum(det1.xyxy[0][:2], det2.xyxy[0][:2])
-    merged_x2, merged_y2 = np.maximum(det1.xyxy[0][2:], det2.xyxy[0][2:])
-
-    merged_xy = np.array([[merged_x1, merged_y1, merged_x2, merged_y2]])
-
-    winning_class_id = winning_det.class_id
-
-    if det1.confidence is None or det2.confidence is None:
-        merged_confidence = None
-    else:
-        merged_confidence = (
-            area_det1 * det1.confidence[0] + area_det2 * det2.confidence[0]
-        ) / (area_det1 + area_det2)
-        merged_confidence = np.array([merged_confidence])
-
-    merged_mask = None
-    if det1.mask is not None and det2.mask is not None:
-        merged_mask = np.logical_or(det1.mask, det2.mask)
-
-    winning_tracker_id = winning_det.tracker_id
-
-    winning_data = None
-    if det1.data and det2.data:
-        winning_data = winning_det.data
-
-    return Detections(
-        xyxy=merged_xy,
-        mask=merged_mask,
-        confidence=merged_confidence,
-        class_id=winning_class_id,
-        tracker_id=winning_tracker_id,
-        data=winning_data,
-    )
-
-
 @dataclass
 class Detections:
     """
@@ -295,7 +198,8 @@ def from_yolov5(cls, yolov5_results) -> Detections:
             detections = sv.Detections.from_yolov5(result)
             ```
         """
-        yolov5_detections_predictions = yolov5_results.pred[0].cpu().cpu().numpy()
+        yolov5_detections_predictions = yolov5_results.pred[0].cpu(
+        ).cpu().numpy()
 
         return cls(
             xyxy=yolov5_detections_predictions[:, :4],
@@ -342,7 +246,8 @@ def from_ultralytics(cls, ultralytics_results) -> Detections:
 
         if "obb" in ultralytics_results and ultralytics_results.obb is not None:
             class_id = ultralytics_results.obb.cls.cpu().numpy().astype(int)
-            class_names = np.array([ultralytics_results.names[i] for i in class_id])
+            class_names = np.array(
+                [ultralytics_results.names[i] for i in class_id])
             oriented_box_coordinates = ultralytics_results.obb.xyxyxyxy.cpu().numpy()
             return cls(
                 xyxy=ultralytics_results.obb.xyxy.cpu().numpy(),
@@ -358,7 +263,8 @@ def from_ultralytics(cls, ultralytics_results) -> Detections:
             )
 
         class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int)
-        class_names = np.array([ultralytics_results.names[i] for i in class_id])
+        class_names = np.array([ultralytics_results.names[i]
+                               for i in class_id])
         return cls(
             xyxy=ultralytics_results.boxes.xyxy.cpu().numpy(),
             confidence=ultralytics_results.boxes.conf.cpu().numpy(),
@@ -446,7 +352,8 @@ def from_tensorflow(
         return cls(
             xyxy=boxes,
             confidence=tensorflow_results["detection_scores"][0].numpy(),
-            class_id=tensorflow_results["detection_classes"][0].numpy().astype(int),
+            class_id=tensorflow_results["detection_classes"][0].numpy().astype(
+                int),
         )
 
     @classmethod
@@ -483,7 +390,8 @@ def from_deepsparse(cls, deepsparse_results) -> Detections:
         return cls(
             xyxy=np.array(deepsparse_results.boxes[0]),
             confidence=np.array(deepsparse_results.scores[0]),
-            class_id=np.array(deepsparse_results.labels[0]).astype(float).astype(int),
+            class_id=np.array(deepsparse_results.labels[0]).astype(
+                float).astype(int),
         )
 
     @classmethod
@@ -570,24 +478,29 @@ class names. If provided, the resulting Detections object will contain
             Class names values can be accessed using `detections["class_name"]`.
         """  # noqa: E501 // docs
 
-        class_ids = transformers_results["labels"].cpu().detach().numpy().astype(int)
+        class_ids = transformers_results["labels"].cpu(
+        ).detach().numpy().astype(int)
         data = {}
         if id2label is not None:
-            class_names = np.array([id2label[class_id] for class_id in class_ids])
+            class_names = np.array([id2label[class_id]
+                                   for class_id in class_ids])
             data[CLASS_NAME_DATA_FIELD] = class_names
         if "boxes" in transformers_results:
             return cls(
                 xyxy=transformers_results["boxes"].cpu().detach().numpy(),
-                confidence=transformers_results["scores"].cpu().detach().numpy(),
+                confidence=transformers_results["scores"].cpu(
+                ).detach().numpy(),
                 class_id=class_ids,
                 data=data,
             )
         elif "masks" in transformers_results:
-            masks = transformers_results["masks"].cpu().detach().numpy().astype(bool)
+            masks = transformers_results["masks"].cpu(
+            ).detach().numpy().astype(bool)
             return cls(
                 xyxy=mask_to_xyxy(masks),
                 mask=masks,
-                confidence=transformers_results["scores"].cpu().detach().numpy(),
+                confidence=transformers_results["scores"].cpu(
+                ).detach().numpy(),
                 class_id=class_ids,
                 data=data,
             )
@@ -630,7 +543,8 @@ class IDs, and confidences of the predictions.
         """
 
         return cls(
-            xyxy=detectron2_results["instances"].pred_boxes.tensor.cpu().numpy(),
+            xyxy=detectron2_results["instances"].pred_boxes.tensor.cpu(
+            ).numpy(),
             confidence=detectron2_results["instances"].scores.cpu().numpy(),
             class_id=detectron2_results["instances"]
             .pred_classes.cpu()
@@ -673,7 +587,8 @@ def from_inference(cls, roboflow_result: Union[dict, Any]) -> Detections:
             Class names values can be accessed using `detections["class_name"]`.
         """
         with suppress(AttributeError):
-            roboflow_result = roboflow_result.dict(exclude_none=True, by_alias=True)
+            roboflow_result = roboflow_result.dict(
+                exclude_none=True, by_alias=True)
         xyxy, confidence, class_id, masks, trackers, data = process_roboflow_result(
             roboflow_result=roboflow_result
         )
@@ -765,7 +680,8 @@ def from_sam(cls, sam_result: List[dict]) -> Detections:
         )
 
         xywh = np.array([mask["bbox"] for mask in sorted_generated_masks])
-        mask = np.array([mask["segmentation"] for mask in sorted_generated_masks])
+        mask = np.array([mask["segmentation"]
+                        for mask in sorted_generated_masks])
 
         if np.asarray(xywh).shape[0] == 0:
             return cls.empty()
@@ -992,7 +908,8 @@ def stack_or_none(name: str):
             if all(d.__getattribute__(name) is None for d in detections_list):
                 return None
             if any(d.__getattribute__(name) is None for d in detections_list):
-                raise ValueError(f"All or none of the '{name}' fields must be None")
+                raise ValueError(
+                    f"All or none of the '{name}' fields must be None")
             return (
                 np.vstack([d.__getattribute__(name) for d in detections_list])
                 if name == "mask"
@@ -1178,7 +1095,8 @@ def _set_at_index(self, index: int, other: Detections):
             ValueError: If `other` is not made of exactly one element.
         """
         if len(other) != 1:
-            raise ValueError("Detection to set from must have exactly one element.")
+            raise ValueError(
+                "Detection to set from must have exactly one element.")
 
         self.xyxy[index] = other.xyxy[0]
         if self.mask is not None and other.mask is not None:
@@ -1250,7 +1168,8 @@ def with_nms(
         ), "Detections confidence must be given for NMS to be executed."
 
         if class_agnostic:
-            predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))
+            predictions = np.hstack(
+                (self.xyxy, self.confidence.reshape(-1, 1)))
         else:
             assert self.class_id is not None, (
                 "Detections class_id must be given for NMS to be executed. If you"
@@ -1306,7 +1225,8 @@ def with_nmm(
         ), "Detections confidence must be given for NMM to be executed."
 
         if class_agnostic:
-            predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))
+            predictions = np.hstack(
+                (self.xyxy, self.confidence.reshape(-1, 1)))
             keep_to_merge_list = non_max_merge(predictions, threshold)
         else:
             assert self.class_id is not None, (
@@ -1325,12 +1245,111 @@ def with_nmm(
         result = []
         for keep_ind, merge_ind_list in keep_to_merge_list.items():
             for merge_ind in merge_ind_list:
-                box_iou = box_iou_batch(self[keep_ind].xyxy, self[merge_ind].xyxy)[0]
+                box_iou = box_iou_batch(
+                    self[keep_ind].xyxy, self[merge_ind].xyxy)[0]
                 if box_iou > threshold:
-                    merged_detection = _merge_object_detection_pair(
+                    merged_detection = self._merge_object_detection_pair(
                         self[keep_ind], self[merge_ind]
                     )
                     self._set_at_index(keep_ind, merged_detection)
             result.append(self[keep_ind])
 
         return Detections.merge(result)
+
+    @staticmethod
+    def _merge_object_detection_pair(det1: Detections, det2: Detections) -> Detections:
+        """
+        Merges two Detections object into a single Detections object.
+        Assumes each Detections contains exactly one object.
+
+        A `winning` detection is determined based on the confidence score of the two
+        input detections. This winning detection is then used to specify which
+        `class_id`, `tracker_id`, and `data` to include in the merged Detections object.
+
+        The resulting `confidence` of the merged object is calculated by the weighted
+        contribution of each detection to the merged object.
+        The bounding boxes and masks of the two input detections are merged into a
+        single bounding box and mask, respectively.
+
+        Args:
+            det1 (Detections):
+                The first Detections object
+            det2 (Detections):
+                The second Detections object
+
+        Returns:
+            Detections: A new Detections object, with merged attributes.
+
+        Raises:
+            ValueError: If the input Detections objects do not have exactly 1 detected
+                object.
+
+        Example:
+            ```python
+            import cv2
+            import supervision as sv
+            from inference import get_model
+
+            image = cv2.imread(<SOURCE_IMAGE_PATH>)
+            model = get_model(model_id="yolov8s-640")
+
+            result = model.infer(image)[0]
+            detections = sv.Detections.from_inference(result)
+
+            merged_detections = merge_object_detection_pair(
+                detections[0], detections[1])
+            ```
+        """
+        if len(det1) != 1 or len(det2) != 1:
+            raise ValueError(
+                "Both Detections should have exactly 1 detected object.")
+
+        if det2.confidence is None:
+            winning_det = det1
+        elif det1.confidence is None:
+            winning_det = det2
+        elif det1.confidence[0] >= det2.confidence[0]:
+            winning_det = det1
+        else:
+            winning_det = det2
+
+        area_det1 = (det1.xyxy[0][2] - det1.xyxy[0][0]) * (
+            det1.xyxy[0][3] - det1.xyxy[0][1]
+        )
+        area_det2 = (det2.xyxy[0][2] - det2.xyxy[0][0]) * (
+            det2.xyxy[0][3] - det2.xyxy[0][1]
+        )
+
+        merged_x1, merged_y1 = np.minimum(det1.xyxy[0][:2], det2.xyxy[0][:2])
+        merged_x2, merged_y2 = np.maximum(det1.xyxy[0][2:], det2.xyxy[0][2:])
+
+        merged_xy = np.array([[merged_x1, merged_y1, merged_x2, merged_y2]])
+
+        winning_class_id = winning_det.class_id
+
+        if det1.confidence is None or det2.confidence is None:
+            merged_confidence = None
+        else:
+            merged_confidence = (
+                area_det1 * det1.confidence[0] + area_det2 * det2.confidence[0]
+            ) / (area_det1 + area_det2)
+            merged_confidence = np.array([merged_confidence])
+
+        merged_mask = None
+        if det1.mask is not None and det2.mask is not None:
+            merged_mask = np.logical_or(det1.mask, det2.mask)
+
+        winning_tracker_id = winning_det.tracker_id
+
+        winning_data = None
+        if det1.data and det2.data:
+            winning_data = winning_det.data
+
+        return Detections(
+            xyxy=merged_xy,
+            mask=merged_mask,
+            confidence=merged_confidence,
+            class_id=winning_class_id,
+            tracker_id=winning_tracker_id,
+            data=winning_data,
+        )

From 204669b08c650378cb03553c55ec417975a4371e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 6 May 2024 14:25:13 +0000
Subject: [PATCH 09/26] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?=
 =?UTF-8?q?=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/detection/core.py | 57 ++++++++++++-----------------------
 1 file changed, 19 insertions(+), 38 deletions(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 501a27e9d..beb68923d 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -198,8 +198,7 @@ def from_yolov5(cls, yolov5_results) -> Detections:
             detections = sv.Detections.from_yolov5(result)
             ```
         """
-        yolov5_detections_predictions = yolov5_results.pred[0].cpu(
-        ).cpu().numpy()
+        yolov5_detections_predictions = yolov5_results.pred[0].cpu().cpu().numpy()
 
         return cls(
             xyxy=yolov5_detections_predictions[:, :4],
@@ -246,8 +245,7 @@ def from_ultralytics(cls, ultralytics_results) -> Detections:
 
         if "obb" in ultralytics_results and ultralytics_results.obb is not None:
             class_id = ultralytics_results.obb.cls.cpu().numpy().astype(int)
-            class_names = np.array(
-                [ultralytics_results.names[i] for i in class_id])
+            class_names = np.array([ultralytics_results.names[i] for i in class_id])
             oriented_box_coordinates = ultralytics_results.obb.xyxyxyxy.cpu().numpy()
             return cls(
                 xyxy=ultralytics_results.obb.xyxy.cpu().numpy(),
@@ -263,8 +261,7 @@ def from_ultralytics(cls, ultralytics_results) -> Detections:
             )
 
         class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int)
-        class_names = np.array([ultralytics_results.names[i]
-                               for i in class_id])
+        class_names = np.array([ultralytics_results.names[i] for i in class_id])
         return cls(
             xyxy=ultralytics_results.boxes.xyxy.cpu().numpy(),
             confidence=ultralytics_results.boxes.conf.cpu().numpy(),
@@ -352,8 +349,7 @@ def from_tensorflow(
         return cls(
             xyxy=boxes,
             confidence=tensorflow_results["detection_scores"][0].numpy(),
-            class_id=tensorflow_results["detection_classes"][0].numpy().astype(
-                int),
+            class_id=tensorflow_results["detection_classes"][0].numpy().astype(int),
         )
 
     @classmethod
@@ -390,8 +386,7 @@ def from_deepsparse(cls, deepsparse_results) -> Detections:
         return cls(
             xyxy=np.array(deepsparse_results.boxes[0]),
             confidence=np.array(deepsparse_results.scores[0]),
-            class_id=np.array(deepsparse_results.labels[0]).astype(
-                float).astype(int),
+            class_id=np.array(deepsparse_results.labels[0]).astype(float).astype(int),
         )
 
     @classmethod
@@ -478,29 +473,24 @@ class names. If provided, the resulting Detections object will contain
             Class names values can be accessed using `detections["class_name"]`.
         """  # noqa: E501 // docs
 
-        class_ids = transformers_results["labels"].cpu(
-        ).detach().numpy().astype(int)
+        class_ids = transformers_results["labels"].cpu().detach().numpy().astype(int)
         data = {}
         if id2label is not None:
-            class_names = np.array([id2label[class_id]
-                                   for class_id in class_ids])
+            class_names = np.array([id2label[class_id] for class_id in class_ids])
             data[CLASS_NAME_DATA_FIELD] = class_names
         if "boxes" in transformers_results:
             return cls(
                 xyxy=transformers_results["boxes"].cpu().detach().numpy(),
-                confidence=transformers_results["scores"].cpu(
-                ).detach().numpy(),
+                confidence=transformers_results["scores"].cpu().detach().numpy(),
                 class_id=class_ids,
                 data=data,
             )
         elif "masks" in transformers_results:
-            masks = transformers_results["masks"].cpu(
-            ).detach().numpy().astype(bool)
+            masks = transformers_results["masks"].cpu().detach().numpy().astype(bool)
             return cls(
                 xyxy=mask_to_xyxy(masks),
                 mask=masks,
-                confidence=transformers_results["scores"].cpu(
-                ).detach().numpy(),
+                confidence=transformers_results["scores"].cpu().detach().numpy(),
                 class_id=class_ids,
                 data=data,
             )
@@ -543,8 +533,7 @@ class IDs, and confidences of the predictions.
         """
 
         return cls(
-            xyxy=detectron2_results["instances"].pred_boxes.tensor.cpu(
-            ).numpy(),
+            xyxy=detectron2_results["instances"].pred_boxes.tensor.cpu().numpy(),
             confidence=detectron2_results["instances"].scores.cpu().numpy(),
             class_id=detectron2_results["instances"]
             .pred_classes.cpu()
@@ -587,8 +576,7 @@ def from_inference(cls, roboflow_result: Union[dict, Any]) -> Detections:
             Class names values can be accessed using `detections["class_name"]`.
         """
         with suppress(AttributeError):
-            roboflow_result = roboflow_result.dict(
-                exclude_none=True, by_alias=True)
+            roboflow_result = roboflow_result.dict(exclude_none=True, by_alias=True)
         xyxy, confidence, class_id, masks, trackers, data = process_roboflow_result(
             roboflow_result=roboflow_result
         )
@@ -680,8 +668,7 @@ def from_sam(cls, sam_result: List[dict]) -> Detections:
         )
 
         xywh = np.array([mask["bbox"] for mask in sorted_generated_masks])
-        mask = np.array([mask["segmentation"]
-                        for mask in sorted_generated_masks])
+        mask = np.array([mask["segmentation"] for mask in sorted_generated_masks])
 
         if np.asarray(xywh).shape[0] == 0:
             return cls.empty()
@@ -908,8 +895,7 @@ def stack_or_none(name: str):
             if all(d.__getattribute__(name) is None for d in detections_list):
                 return None
             if any(d.__getattribute__(name) is None for d in detections_list):
-                raise ValueError(
-                    f"All or none of the '{name}' fields must be None")
+                raise ValueError(f"All or none of the '{name}' fields must be None")
             return (
                 np.vstack([d.__getattribute__(name) for d in detections_list])
                 if name == "mask"
@@ -1095,8 +1081,7 @@ def _set_at_index(self, index: int, other: Detections):
             ValueError: If `other` is not made of exactly one element.
         """
         if len(other) != 1:
-            raise ValueError(
-                "Detection to set from must have exactly one element.")
+            raise ValueError("Detection to set from must have exactly one element.")
 
         self.xyxy[index] = other.xyxy[0]
         if self.mask is not None and other.mask is not None:
@@ -1168,8 +1153,7 @@ def with_nms(
         ), "Detections confidence must be given for NMS to be executed."
 
         if class_agnostic:
-            predictions = np.hstack(
-                (self.xyxy, self.confidence.reshape(-1, 1)))
+            predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))
         else:
             assert self.class_id is not None, (
                 "Detections class_id must be given for NMS to be executed. If you"
@@ -1225,8 +1209,7 @@ def with_nmm(
         ), "Detections confidence must be given for NMM to be executed."
 
         if class_agnostic:
-            predictions = np.hstack(
-                (self.xyxy, self.confidence.reshape(-1, 1)))
+            predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))
             keep_to_merge_list = non_max_merge(predictions, threshold)
         else:
             assert self.class_id is not None, (
@@ -1245,8 +1228,7 @@ def with_nmm(
         result = []
         for keep_ind, merge_ind_list in keep_to_merge_list.items():
             for merge_ind in merge_ind_list:
-                box_iou = box_iou_batch(
-                    self[keep_ind].xyxy, self[merge_ind].xyxy)[0]
+                box_iou = box_iou_batch(self[keep_ind].xyxy, self[merge_ind].xyxy)[0]
                 if box_iou > threshold:
                     merged_detection = self._merge_object_detection_pair(
                         self[keep_ind], self[merge_ind]
@@ -1301,8 +1283,7 @@ def _merge_object_detection_pair(det1: Detections, det2: Detections) -> Detectio
             ```
         """
         if len(det1) != 1 or len(det2) != 1:
-            raise ValueError(
-                "Both Detections should have exactly 1 detected object.")
+            raise ValueError("Both Detections should have exactly 1 detected object.")
 
         if det2.confidence is None:
             winning_det = det1

From c3b77d05c09f4a0192fb48aa95ab6ef701c557ed Mon Sep 17 00:00:00 2001
From: Linas Kondrackis <linas.ko+dev@skiff.com>
Date: Tue, 14 May 2024 17:12:19 +0300
Subject: [PATCH 10/26] Rename, remove functions, unit-test & change
 `merge_object_detection_pair`

---
 supervision/detection/core.py  | 176 ++++++++++++++++-----------------
 supervision/detection/utils.py |  29 +-----
 test/detection/test_core.py    | 129 +++++++++++++++++++++++-
 3 files changed, 219 insertions(+), 115 deletions(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index d56ba5160..0777571fc 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -8,8 +8,9 @@
 
 from supervision.config import CLASS_NAME_DATA_FIELD, ORIENTED_BOX_COORDINATES
 from supervision.detection.utils import (
-    batch_non_max_merge,
+    box_batch_non_max_merge,
     box_iou_batch,
+    box_non_max_merge,
     box_non_max_suppression,
     calculate_masks_centroids,
     extract_ultralytics_masks,
@@ -18,7 +19,6 @@
     mask_non_max_suppression,
     mask_to_xyxy,
     merge_data,
-    non_max_merge,
     process_roboflow_result,
     xywh_to_xyxy,
 )
@@ -1213,7 +1213,7 @@ def with_nmm(
 
         if class_agnostic:
             predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))
-            keep_to_merge_list = non_max_merge(predictions, threshold)
+            keep_to_merge_list = box_non_max_merge(predictions, threshold)
         else:
             assert self.class_id is not None, (
                 "Detections class_id must be given for NMS to be executed. If you"
@@ -1226,14 +1226,14 @@ def with_nmm(
                     self.class_id.reshape(-1, 1),
                 )
             )
-            keep_to_merge_list = batch_non_max_merge(predictions, threshold)
+            keep_to_merge_list = box_batch_non_max_merge(predictions, threshold)
 
         result = []
         for keep_ind, merge_ind_list in keep_to_merge_list.items():
             for merge_ind in merge_ind_list:
                 box_iou = box_iou_batch(self[keep_ind].xyxy, self[merge_ind].xyxy)[0]
                 if box_iou > threshold:
-                    merged_detection = self._merge_object_detection_pair(
+                    merged_detection = self.merge_object_detection_pair(
                         self[keep_ind], self[merge_ind]
                     )
                     self._set_at_index(keep_ind, merged_detection)
@@ -1241,99 +1241,95 @@ def with_nmm(
 
         return Detections.merge(result)
 
-    @staticmethod
-    def _merge_object_detection_pair(det1: Detections, det2: Detections) -> Detections:
-        """
-        Merges two Detections object into a single Detections object.
-        Assumes each Detections contains exactly one object.
-
-        A `winning` detection is determined based on the confidence score of the two
-        input detections. This winning detection is then used to specify which
-        `class_id`, `tracker_id`, and `data` to include in the merged Detections object.
-
-        The resulting `confidence` of the merged object is calculated by the weighted
-        contribution of each detection to the merged object.
-        The bounding boxes and masks of the two input detections are merged into a
-        single bounding box and mask, respectively.
-
-        Args:
-            det1 (Detections):
-                The first Detections object
-            det2 (Detections):
-                The second Detections object
-
-        Returns:
-            Detections: A new Detections object, with merged attributes.
-
-        Raises:
-            ValueError: If the input Detections objects do not have exactly 1 detected
-                object.
-
-        Example:
-            ```python
-            import cv2
-            import supervision as sv
-            from inference import get_model
 
-            image = cv2.imread(<SOURCE_IMAGE_PATH>)
-            model = get_model(model_id="yolov8s-640")
-
-            result = model.infer(image)[0]
-            detections = sv.Detections.from_inference(result)
+def merge_object_detection_pair(det1: Detections, det2: Detections) -> Detections:
+    """
+    Merges two Detections object into a single Detections object.
+    Assumes each Detections contains exactly one object.
 
-            merged_detections = merge_object_detection_pair(
-                detections[0], detections[1])
-            ```
-        """
-        if len(det1) != 1 or len(det2) != 1:
-            raise ValueError("Both Detections should have exactly 1 detected object.")
-
-        if det2.confidence is None:
-            winning_det = det1
-        elif det1.confidence is None:
-            winning_det = det2
-        elif det1.confidence[0] >= det2.confidence[0]:
-            winning_det = det1
-        else:
-            winning_det = det2
+    A `winning` detection is determined based on the confidence score of the two
+    input detections. This winning detection is then used to specify which
+    `class_id`, `tracker_id`, and `data` to include in the merged Detections object.
 
-        area_det1 = (det1.xyxy[0][2] - det1.xyxy[0][0]) * (
-            det1.xyxy[0][3] - det1.xyxy[0][1]
-        )
-        area_det2 = (det2.xyxy[0][2] - det2.xyxy[0][0]) * (
-            det2.xyxy[0][3] - det2.xyxy[0][1]
-        )
+    The resulting `confidence` of the merged object is calculated by the weighted
+    contribution of ea detection to the merged object.
+    The bounding boxes and masks of the two input detections are merged into a
+    single bounding box and mask, respectively.
 
-        merged_x1, merged_y1 = np.minimum(det1.xyxy[0][:2], det2.xyxy[0][:2])
-        merged_x2, merged_y2 = np.maximum(det1.xyxy[0][2:], det2.xyxy[0][2:])
+    Args:
+        det1 (Detections):
+            The first Detections object
+        det2 (Detections):
+            The second Detections object
 
-        merged_xy = np.array([[merged_x1, merged_y1, merged_x2, merged_y2]])
+    Returns:
+        Detections: A new Detections object, with merged attributes.
 
-        winning_class_id = winning_det.class_id
+    Raises:
+        ValueError: If the input Detections objects do not have exactly 1 detected
+            object.
 
-        if det1.confidence is None or det2.confidence is None:
-            merged_confidence = None
-        else:
-            merged_confidence = (
-                area_det1 * det1.confidence[0] + area_det2 * det2.confidence[0]
-            ) / (area_det1 + area_det2)
-            merged_confidence = np.array([merged_confidence])
+    Example:
+        ```python
+        import cv2
+        import supervision as sv
+        from inference import get_model
 
-        merged_mask = None
-        if det1.mask is not None and det2.mask is not None:
-            merged_mask = np.logical_or(det1.mask, det2.mask)
+        image = cv2.imread(<SOURCE_IMAGE_PATH>)
+        model = get_model(model_id="yolov8s-640")
 
-        winning_tracker_id = winning_det.tracker_id
+        result = model.infer(image)[0]
+        detections = sv.Detections.from_inference(result)
 
-        winning_data = None
-        if det1.data and det2.data:
-            winning_data = winning_det.data
+        merged_detections = merge_object_detection_pair(
+            detections[0], detections[1])
+        ```
+    """
+    if len(det1) != 1 or len(det2) != 1:
+        raise ValueError("Both Detections should have exactly 1 detected object.")
+
+    if det2.confidence is None:
+        winning_det = det1
+    elif det1.confidence is None:
+        winning_det = det2
+    elif det1.confidence[0] >= det2.confidence[0]:
+        winning_det = det1
+    else:
+        winning_det = det2
+
+    area_det1 = (det1.xyxy[0][2] - det1.xyxy[0][0]) * (
+        det1.xyxy[0][3] - det1.xyxy[0][1]
+    )
+    area_det2 = (det2.xyxy[0][2] - det2.xyxy[0][0]) * (
+        det2.xyxy[0][3] - det2.xyxy[0][1]
+    )
 
-        return Detections(
-            xyxy=merged_xy,
-            mask=merged_mask,
-            confidence=merged_confidence,
-            class_id=winning_class_id,
-            tracker_id=winning_tracker_id,
-            data=winning_data,
-        )
+    merged_x1, merged_y1 = np.minimum(det1.xyxy[0][:2], det2.xyxy[0][:2])
+    merged_x2, merged_y2 = np.maximum(det1.xyxy[0][2:], det2.xyxy[0][2:])
+    merged_xy = np.array([[merged_x1, merged_y1, merged_x2, merged_y2]])
+
+    if det2.mask is None or det1.mask is None:
+        merged_mask = winning_det.mask
+    else:
+        merged_mask = np.logical_or(det1.mask, det2.mask)
+
+    if det1.confidence is None or det2.confidence is None:
+        merged_confidence = winning_det.confidence
+    else:
+        merged_confidence = (
+            area_det1 * det1.confidence[0] + area_det2 * det2.confidence[0]
+        ) / (area_det1 + area_det2)
+        merged_confidence = np.array([merged_confidence])
+
+    winning_class_id = winning_det.class_id
+    winning_tracker_id = winning_det.tracker_id
+    winning_data = winning_det.data
+
+    return Detections(
+        xyxy=merged_xy,
+        mask=merged_mask,
+        confidence=merged_confidence,
+        class_id=winning_class_id,
+        tracker_id=winning_tracker_id,
+        data=winning_data,
+    )
diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py
index d2e403a49..bd20ab37d 100644
--- a/supervision/detection/utils.py
+++ b/supervision/detection/utils.py
@@ -274,7 +274,7 @@ def box_non_max_suppression(
     return keep[sort_index.argsort()]
 
 
-def non_max_merge(
+def box_non_max_merge(
     predictions: np.ndarray, threshold: float = 0.5
 ) -> Dict[int, List[int]]:
     """
@@ -353,7 +353,7 @@ def non_max_merge(
     return keep_to_merge_list
 
 
-def batch_non_max_merge(
+def box_batch_non_max_merge(
     predictions: np.ndarray, threshold: float = 0.5
 ) -> Dict[int, List[int]]:
     """
@@ -375,7 +375,9 @@ def batch_non_max_merge(
     keep_to_merge_list = {}
     for category_id in np.unique(category_ids):
         curr_indices = np.where(category_ids == category_id)[0]
-        curr_keep_to_merge_list = non_max_merge(predictions[curr_indices], threshold)
+        curr_keep_to_merge_list = box_non_max_merge(
+            predictions[curr_indices], threshold
+        )
         curr_indices_list = curr_indices.tolist()
         for curr_keep, curr_merge_list in curr_keep_to_merge_list.items():
             keep = curr_indices_list[curr_keep]
@@ -384,27 +386,6 @@ def batch_non_max_merge(
     return keep_to_merge_list
 
 
-def get_merged_bbox(bbox1: np.ndarray, bbox2: np.ndarray) -> np.ndarray:
-    """
-    Merges two bounding boxes into one.
-
-    Args:
-        bbox1 (np.ndarray): A numpy array of shape `(, 4)` where the
-            row corresponds to a bounding box in
-            the format `(x_min, y_min, x_max, y_max)`.
-        bbox2 (np.ndarray): A numpy array of shape `(, 4)` where the
-            row corresponds to a bounding box in
-            the format `(x_min, y_min, x_max, y_max)`.
-
-    Returns:
-        np.ndarray: A numpy array of shape `(, 4)` where the new
-            bounding box is the merged bounding box of `bbox1` and `bbox2`.
-    """
-    left_top = np.minimum(bbox1[0][:2], bbox2[0][:2])
-    right_bottom = np.maximum(bbox1[0][2:], bbox2[0][2:])
-    return np.array([np.concatenate([left_top, right_bottom])])
-
-
 def clip_boxes(xyxy: np.ndarray, resolution_wh: Tuple[int, int]) -> np.ndarray:
     """
     Clips bounding boxes coordinates to fit within the frame resolution.
diff --git a/test/detection/test_core.py b/test/detection/test_core.py
index 12f3de281..31e56decd 100644
--- a/test/detection/test_core.py
+++ b/test/detection/test_core.py
@@ -5,7 +5,7 @@
 import numpy as np
 import pytest
 
-from supervision.detection.core import Detections
+from supervision.detection.core import Detections, merge_object_detection_pair
 from supervision.geometry.core import Position
 
 PREDICTIONS = np.array(
@@ -421,3 +421,130 @@ def test_equal(
     detections_a: Detections, detections_b: Detections, expected_result: bool
 ) -> None:
     assert (detections_a == detections_b) == expected_result
+
+
+@pytest.mark.parametrize(
+    "detection_1, detection_2, expected_result, exception",
+    [
+        (
+            mock_detections(
+                xyxy=[[10, 10, 30, 30]],
+            ),
+            mock_detections(
+                xyxy=[[10, 10, 30, 30]],
+            ),
+            mock_detections(
+                xyxy=[[10, 10, 30, 30]],
+            ),
+            DoesNotRaise(),
+        ),  # Merge with self
+        (
+            mock_detections(
+                xyxy=[[10, 10, 30, 30]],
+            ),
+            Detections.empty(),
+            None,
+            pytest.raises(ValueError),
+        ),  # merge with empty: error
+        (
+            mock_detections(
+                xyxy=[[10, 10, 30, 30]],
+            ),
+            mock_detections(
+                xyxy=[[10, 10, 30, 30], [40, 40, 60, 60]],
+            ),
+            None,
+            pytest.raises(ValueError),
+        ),  # merge with 2+ objects: error
+        (
+            mock_detections(
+                xyxy=[[10, 10, 30, 30]],
+                confidence=[0.1],
+                class_id=[1],
+                mask=[np.array([[1, 1, 0], [1, 1, 0], [0, 0, 0]], dtype=bool)],
+                tracker_id=[1],
+                data={"key_1": [1]},
+            ),
+            mock_detections(
+                xyxy=[[20, 20, 40, 40]],
+                confidence=[0.1],
+                class_id=[2],
+                mask=[np.array([[0, 0, 0], [0, 1, 1], [0, 1, 1]], dtype=bool)],
+                tracker_id=[2],
+                data={"key_2": [2]},
+            ),
+            mock_detections(
+                xyxy=[[10, 10, 40, 40]],
+                confidence=[0.1],
+                class_id=[1],
+                mask=[np.array([[1, 1, 0], [1, 1, 1], [0, 1, 1]], dtype=bool)],
+                tracker_id=[1],
+                data={"key_1": [1]},
+            ),
+            DoesNotRaise(),
+        ),  # Same confidence - merge box & mask, tiebreak to detection_1
+        (
+            mock_detections(
+                xyxy=[[0, 0, 20, 20]],
+                confidence=[0.1],
+                class_id=[1],
+                mask=[np.array([[1, 1, 0], [1, 1, 0], [0, 0, 0]], dtype=bool)],
+                tracker_id=[1],
+                data={"key_1": [1]},
+            ),
+            mock_detections(
+                xyxy=[[10, 10, 50, 50]],
+                confidence=[0.2],
+                class_id=[2],
+                mask=[np.array([[0, 0, 0], [0, 1, 1], [0, 1, 1]], dtype=bool)],
+                tracker_id=[2],
+                data={"key_2": [2]},
+            ),
+            mock_detections(
+                xyxy=[[0, 0, 50, 50]],
+                confidence=[(1 * 0.1 + 4 * 0.2) / 5],
+                class_id=[2],
+                mask=[np.array([[1, 1, 0], [1, 1, 1], [0, 1, 1]], dtype=bool)],
+                tracker_id=[2],
+                data={"key_2": [2]},
+            ),
+            DoesNotRaise(),
+        ),  # Different confidence, different area
+        (
+            mock_detections(
+                xyxy=[[0, 0, 20, 20]],
+                confidence=None,
+                class_id=[1],
+                mask=[np.array([[1, 1, 0], [1, 1, 0], [0, 0, 0]], dtype=bool)],
+                tracker_id=[1],
+                data={"key_1": [1]},
+            ),
+            mock_detections(
+                xyxy=[[10, 10, 30, 30]],
+                confidence=[0.2],
+                class_id=[2],
+                mask=[np.array([[0, 0, 0], [0, 1, 1], [0, 1, 1]], dtype=bool)],
+                tracker_id=[2],
+                data={"key_2": [2]},
+            ),
+            mock_detections(
+                xyxy=[[0, 0, 30, 30]],
+                confidence=[0.2],
+                class_id=[2],
+                mask=[np.array([[1, 1, 0], [1, 1, 1], [0, 1, 1]], dtype=bool)],
+                tracker_id=[2],
+                data={"key_2": [2]},
+            ),
+            DoesNotRaise(),
+        ),  # merge with no confidence
+    ],
+)
+def test_merge_object_detection_pair(
+    detection_1: Detections,
+    detection_2: Detections,
+    expected_result: Optional[Detections],
+    exception: Exception,
+):
+    with exception:
+        result = merge_object_detection_pair(detection_1, detection_2)
+        assert result == expected_result

From 8014e88944b9f1135448761b0c7f0832df7589ae Mon Sep 17 00:00:00 2001
From: Linas Kondrackis <linas.ko+dev@skiff.com>
Date: Tue, 14 May 2024 17:42:47 +0300
Subject: [PATCH 11/26] Test box_non_max_merge

---
 supervision/detection/utils.py |   6 +-
 test/detection/test_utils.py   | 126 +++++++++++++++++++++++++++++++++
 2 files changed, 129 insertions(+), 3 deletions(-)

diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py
index bd20ab37d..f177d0886 100644
--- a/supervision/detection/utils.py
+++ b/supervision/detection/utils.py
@@ -275,7 +275,7 @@ def box_non_max_suppression(
 
 
 def box_non_max_merge(
-    predictions: np.ndarray, threshold: float = 0.5
+    predictions: np.ndarray, iou_threshold: float = 0.5
 ) -> Dict[int, List[int]]:
     """
     Apply greedy version of non-maximum merging to avoid detecting too many
@@ -285,7 +285,7 @@ def box_non_max_merge(
         predictions (np.ndarray): An array of shape `(n, 5)` containing
             the bounding boxes coordinates in format `[x1, y1, x2, y2]`
             and the confidence scores.
-        threshold (float, optional): The intersection-over-union threshold
+        iou_threshold (float, optional): The intersection-over-union threshold
             to use for non-maximum suppression. Defaults to 0.5.
 
     Returns:
@@ -338,7 +338,7 @@ def box_non_max_merge(
         union = (rem_areas - inter) + areas[idx]
         match_metric_value = inter / union
 
-        mask = match_metric_value < threshold
+        mask = match_metric_value < iou_threshold
         mask = mask.astype(np.uint8)
         matched_box_indices = np.flip(order[np.where(mask == 0)[0]])
         unmatched_indices = order[np.where(mask == 1)[0]]
diff --git a/test/detection/test_utils.py b/test/detection/test_utils.py
index 097c5c6e5..e6f330841 100644
--- a/test/detection/test_utils.py
+++ b/test/detection/test_utils.py
@@ -6,6 +6,7 @@
 
 from supervision.config import CLASS_NAME_DATA_FIELD
 from supervision.detection.utils import (
+    box_non_max_merge,
     box_non_max_suppression,
     calculate_masks_centroids,
     clip_boxes,
@@ -127,6 +128,131 @@ def test_box_non_max_suppression(
         assert np.array_equal(result, expected_result)
 
 
+@pytest.mark.parametrize(
+    "predictions, iou_threshold, expected_result, exception",
+    [
+        (
+            np.empty(shape=(0, 5), dtype=float),
+            0.5,
+            {},
+            DoesNotRaise(),
+        ),
+        (
+            np.array([[0, 0, 10, 10, 1.0]]),
+            0.5,
+            {0: []},
+            DoesNotRaise(),
+        ),
+        (
+            np.array([[0, 0, 10, 10, 1.0], [0, 0, 9, 9, 1.0]]),
+            0.5,
+            {1: [0]},
+            DoesNotRaise(),
+        ),  # High overlap, tie-break to second det
+        (
+            np.array([[0, 0, 10, 10, 1.0], [0, 0, 9, 9, 0.99]]),
+            0.5,
+            {0: [1]},
+            DoesNotRaise(),
+        ),  # High overlap, merge to high confidence
+        (
+            np.array([[0, 0, 10, 10, 0.99], [0, 0, 9, 9, 1.0]]),
+            0.5,
+            {1: [0]},
+            DoesNotRaise(),
+        ),  # (test symmetry) High overlap, merge to high confidence
+        (
+            np.array([[0, 0, 10, 10, 0.99], [0, 0, 9, 9, 1.0]]),
+            0.5,
+            {1: [0]},
+            DoesNotRaise(),
+        ),  # (test symmetry) High overlap, merge to high confidence
+        (
+            np.array([[0, 0, 10, 10, 1.0], [0, 0, 9, 9, 1.0]]),
+            1.0,
+            {0: [], 1: []},
+            DoesNotRaise(),
+        ),  # High IOU required
+        (
+            np.array([[0, 0, 10, 10, 1.0], [0, 0, 9, 9, 1.0]]),
+            0.0,
+            {1: [0]},
+            DoesNotRaise(),
+        ),  # No IOU required
+        (
+            np.array([[0, 0, 10, 10, 1.0], [0, 0, 5, 5, 0.9]]),
+            0.25,
+            {0: [1]},
+            DoesNotRaise(),
+        ),  # Below IOU requirement
+        (
+            np.array([[0, 0, 10, 10, 1.0], [0, 0, 5, 5, 0.9]]),
+            0.26,
+            {0: [], 1: []},
+            DoesNotRaise(),
+        ),  # Above IOU requirement
+        (
+            np.array([[0, 0, 10, 10, 1.0], [0, 0, 9, 9, 1.0], [0, 0, 8, 8, 1.0]]),
+            0.5,
+            {2: [1, 0]},
+            DoesNotRaise(),
+        ),  # 3 boxes
+        (
+            np.array(
+                [
+                    [0, 0, 10, 10, 1.0],
+                    [0, 0, 9, 9, 1.0],
+                    [5, 5, 10, 10, 1.0],
+                    [6, 6, 10, 10, 1.0],
+                    [9, 9, 10, 10, 1.0],
+                ]
+            ),
+            0.5,
+            {1: [0], 3: [2], 4: []},
+            DoesNotRaise(),
+        ),  # 5 boxes, 2 merges, 1 separate
+        (
+            np.array(
+                [
+                    [0, 0, 2, 1, 1.0],
+                    [1, 0, 3, 1, 1.0],
+                    [2, 0, 4, 1, 1.0],
+                    [3, 0, 5, 1, 1.0],
+                    [4, 0, 6, 1, 1.0],
+                ]
+            ),
+            0.33,
+            {0: [], 2: [1], 4: [3]},
+            DoesNotRaise(),
+        ),  # sequential merge, half overlap
+        (
+            np.array(
+                [
+                    [0, 0, 2, 1, 0.9],
+                    [1, 0, 3, 1, 0.9],
+                    [2, 0, 4, 1, 1.0],
+                    [3, 0, 5, 1, 0.9],
+                    [4, 0, 6, 1, 0.9],
+                ]
+            ),
+            0.33,
+            {0: [], 2: [3, 1], 4: []},
+            DoesNotRaise(),
+        ),  # confidence
+    ],
+)
+def test_box_non_max_merge(
+    predictions: np.ndarray,
+    iou_threshold: float,
+    expected_result: Dict[int, List[int]],
+    exception: Exception,
+) -> None:
+    with exception:
+        result = box_non_max_merge(predictions=predictions, iou_threshold=iou_threshold)
+
+        assert result == expected_result
+
+
 @pytest.mark.parametrize(
     "predictions, masks, iou_threshold, expected_result, exception",
     [

From 26bafec8f732ae921fc44ac068e9ed564a067331 Mon Sep 17 00:00:00 2001
From: Linas Kondrackis <linas.ko+dev@skiff.com>
Date: Wed, 15 May 2024 09:23:34 +0300
Subject: [PATCH 12/26] Test box_non_max_merge, rename threshold,to __init__

---
 supervision/__init__.py        | 4 +++-
 supervision/detection/core.py  | 4 ++--
 supervision/detection/utils.py | 8 ++++----
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/supervision/__init__.py b/supervision/__init__.py
index 16de484a3..3eae2e178 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -35,7 +35,7 @@
     DetectionDataset,
 )
 from supervision.detection.annotate import BoxAnnotator
-from supervision.detection.core import Detections
+from supervision.detection.core import Detections, merge_object_detection_pair
 from supervision.detection.line_zone import LineZone, LineZoneAnnotator
 from supervision.detection.tools.csv_sink import CSVSink
 from supervision.detection.tools.inference_slicer import InferenceSlicer
@@ -43,7 +43,9 @@
 from supervision.detection.tools.polygon_zone import PolygonZone, PolygonZoneAnnotator
 from supervision.detection.tools.smoother import DetectionsSmoother
 from supervision.detection.utils import (
+    batch_box_non_max_merge,
     box_iou_batch,
+    box_non_max_merge,
     box_non_max_suppression,
     calculate_masks_centroids,
     clip_boxes,
diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 0777571fc..1b3a385de 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -8,7 +8,7 @@
 
 from supervision.config import CLASS_NAME_DATA_FIELD, ORIENTED_BOX_COORDINATES
 from supervision.detection.utils import (
-    box_batch_non_max_merge,
+    batch_box_non_max_merge,
     box_iou_batch,
     box_non_max_merge,
     box_non_max_suppression,
@@ -1226,7 +1226,7 @@ def with_nmm(
                     self.class_id.reshape(-1, 1),
                 )
             )
-            keep_to_merge_list = box_batch_non_max_merge(predictions, threshold)
+            keep_to_merge_list = batch_box_non_max_merge(predictions, threshold)
 
         result = []
         for keep_ind, merge_ind_list in keep_to_merge_list.items():
diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py
index f177d0886..c2f02c1b9 100644
--- a/supervision/detection/utils.py
+++ b/supervision/detection/utils.py
@@ -353,8 +353,8 @@ def box_non_max_merge(
     return keep_to_merge_list
 
 
-def box_batch_non_max_merge(
-    predictions: np.ndarray, threshold: float = 0.5
+def batch_box_non_max_merge(
+    predictions: np.ndarray, iou_threshold: float = 0.5
 ) -> Dict[int, List[int]]:
     """
     Apply greedy version of non-maximum merging per category to avoid detecting
@@ -364,7 +364,7 @@ def box_batch_non_max_merge(
         predictions (np.ndarray): An array of shape `(n, 6)` containing
             the bounding boxes coordinates in format `[x1, y1, x2, y2]`,
             the confidence scores and class_ids.
-        threshold (float, optional): The intersection-over-union threshold
+        iou_threshold (float, optional): The intersection-over-union threshold
             to use for non-maximum suppression. Defaults to 0.5.
 
     Returns:
@@ -376,7 +376,7 @@ def box_batch_non_max_merge(
     for category_id in np.unique(category_ids):
         curr_indices = np.where(category_ids == category_id)[0]
         curr_keep_to_merge_list = box_non_max_merge(
-            predictions[curr_indices], threshold
+            predictions[curr_indices], iou_threshold
         )
         curr_indices_list = curr_indices.tolist()
         for curr_keep, curr_merge_list in curr_keep_to_merge_list.items():

From d2d50fbe467ca3fec33e46619c63ac0548ced50b Mon Sep 17 00:00:00 2001
From: Linas Kondrackis <linas.ko+dev@skiff.com>
Date: Wed, 15 May 2024 09:26:18 +0300
Subject: [PATCH 13/26] renamed bbox -> xyxy

---
 supervision/detection/utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py
index c2f02c1b9..f6308f57a 100644
--- a/supervision/detection/utils.py
+++ b/supervision/detection/utils.py
@@ -458,7 +458,7 @@ def mask_to_xyxy(masks: np.ndarray) -> np.ndarray:
             `(x_min, y_min, x_max, y_max)` for each mask
     """
     n = masks.shape[0]
-    bboxes = np.zeros((n, 4), dtype=int)
+    xyxy = np.zeros((n, 4), dtype=int)
 
     for i, mask in enumerate(masks):
         rows, cols = np.where(mask)
@@ -466,9 +466,9 @@ def mask_to_xyxy(masks: np.ndarray) -> np.ndarray:
         if len(rows) > 0 and len(cols) > 0:
             x_min, x_max = np.min(cols), np.max(cols)
             y_min, y_max = np.min(rows), np.max(rows)
-            bboxes[i, :] = [x_min, y_min, x_max, y_max]
+            xyxy[i, :] = [x_min, y_min, x_max, y_max]
 
-    return bboxes
+    return xyxy
 
 
 def mask_to_polygons(mask: np.ndarray) -> List[np.ndarray]:

From 2d740bdcb6b197f6aefe7436a718191c53884042 Mon Sep 17 00:00:00 2001
From: Linas Kondrackis <linas.ko+dev@skiff.com>
Date: Wed, 15 May 2024 09:38:58 +0300
Subject: [PATCH 14/26] fix: merge_object_detection_pair

---
 supervision/detection/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 1b3a385de..76224bb72 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -1233,7 +1233,7 @@ def with_nmm(
             for merge_ind in merge_ind_list:
                 box_iou = box_iou_batch(self[keep_ind].xyxy, self[merge_ind].xyxy)[0]
                 if box_iou > threshold:
-                    merged_detection = self.merge_object_detection_pair(
+                    merged_detection = merge_object_detection_pair(
                         self[keep_ind], self[merge_ind]
                     )
                     self._set_at_index(keep_ind, merged_detection)

From 145b5fe56c1b1daec6e8161fece90a5f23155c76 Mon Sep 17 00:00:00 2001
From: Linas Kondrackis <linas.ko+dev@skiff.com>
Date: Wed, 15 May 2024 10:46:56 +0300
Subject: [PATCH 15/26] Rename to batch_box_non_max_merge to
 box_non_max_merge_batch

---
 supervision/__init__.py        | 2 +-
 supervision/detection/core.py  | 4 ++--
 supervision/detection/utils.py | 8 +-------
 3 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/supervision/__init__.py b/supervision/__init__.py
index 3eae2e178..03f52086f 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -43,9 +43,9 @@
 from supervision.detection.tools.polygon_zone import PolygonZone, PolygonZoneAnnotator
 from supervision.detection.tools.smoother import DetectionsSmoother
 from supervision.detection.utils import (
-    batch_box_non_max_merge,
     box_iou_batch,
     box_non_max_merge,
+    box_non_max_merge_batch,
     box_non_max_suppression,
     calculate_masks_centroids,
     clip_boxes,
diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 76224bb72..2489ef801 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -8,9 +8,9 @@
 
 from supervision.config import CLASS_NAME_DATA_FIELD, ORIENTED_BOX_COORDINATES
 from supervision.detection.utils import (
-    batch_box_non_max_merge,
     box_iou_batch,
     box_non_max_merge,
+    box_non_max_merge_batch,
     box_non_max_suppression,
     calculate_masks_centroids,
     extract_ultralytics_masks,
@@ -1226,7 +1226,7 @@ def with_nmm(
                     self.class_id.reshape(-1, 1),
                 )
             )
-            keep_to_merge_list = batch_box_non_max_merge(predictions, threshold)
+            keep_to_merge_list = box_non_max_merge_batch(predictions, threshold)
 
         result = []
         for keep_ind, merge_ind_list in keep_to_merge_list.items():
diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py
index f6308f57a..c159de596 100644
--- a/supervision/detection/utils.py
+++ b/supervision/detection/utils.py
@@ -300,18 +300,12 @@ def box_non_max_merge(
     y2 = predictions[:, 3]
 
     scores = predictions[:, 4]
-
     areas = (x2 - x1) * (y2 - y1)
 
     order = scores.argsort()
 
-    keep = []
-
     while len(order) > 0:
         idx = order[-1]
-
-        keep.append(idx.tolist())
-
         order = order[:-1]
 
         if len(order) == 0:
@@ -353,7 +347,7 @@ def box_non_max_merge(
     return keep_to_merge_list
 
 
-def batch_box_non_max_merge(
+def box_non_max_merge_batch(
     predictions: np.ndarray, iou_threshold: float = 0.5
 ) -> Dict[int, List[int]]:
     """

From 6c4093526607b4b37db4f2bcb05087ef53db83ad Mon Sep 17 00:00:00 2001
From: Linas Kondrackis <linas.ko+dev@skiff.com>
Date: Wed, 15 May 2024 11:32:30 +0300
Subject: [PATCH 16/26] box_non_max_merge: use our functions to compute iou

---
 supervision/detection/utils.py | 35 +++++-----------------------------
 1 file changed, 5 insertions(+), 30 deletions(-)

diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py
index c159de596..cb2545522 100644
--- a/supervision/detection/utils.py
+++ b/supervision/detection/utils.py
@@ -294,14 +294,7 @@ def box_non_max_merge(
     """
     keep_to_merge_list = {}
 
-    x1 = predictions[:, 0]
-    y1 = predictions[:, 1]
-    x2 = predictions[:, 2]
-    y2 = predictions[:, 3]
-
     scores = predictions[:, 4]
-    areas = (x2 - x1) * (y2 - y1)
-
     order = scores.argsort()
 
     while len(order) > 0:
@@ -312,30 +305,12 @@ def box_non_max_merge(
             keep_to_merge_list[idx.tolist()] = []
             break
 
-        xx1 = np.take(x1, axis=0, indices=order)
-        xx2 = np.take(x2, axis=0, indices=order)
-        yy1 = np.take(y1, axis=0, indices=order)
-        yy2 = np.take(y2, axis=0, indices=order)
-
-        xx1 = np.maximum(xx1, x1[idx])
-        yy1 = np.maximum(yy1, y1[idx])
-        xx2 = np.minimum(xx2, x2[idx])
-        yy2 = np.minimum(yy2, y2[idx])
-
-        w = np.maximum(0, xx2 - xx1)
-        h = np.maximum(0, yy2 - yy1)
-
-        inter = w * h
-
-        rem_areas = np.take(areas, axis=0, indices=order)
-
-        union = (rem_areas - inter) + areas[idx]
-        match_metric_value = inter / union
+        candidate = np.expand_dims(predictions[idx], axis=0)
+        ious = box_iou_batch(predictions[order][:, :4], candidate[:, :4])
 
-        mask = match_metric_value < iou_threshold
-        mask = mask.astype(np.uint8)
-        matched_box_indices = np.flip(order[np.where(mask == 0)[0]])
-        unmatched_indices = order[np.where(mask == 1)[0]]
+        mask = ious < iou_threshold
+        matched_box_indices = np.flip(order[np.where(mask is False)[0]])
+        unmatched_indices = order[np.where(mask is True)[0]]
 
         order = unmatched_indices[scores[unmatched_indices].argsort()]
 

From 53f345e91614a72b20a1f19c04d5369fa17a26ed Mon Sep 17 00:00:00 2001
From: Linas Kondrackis <linas.ko+dev@skiff.com>
Date: Wed, 15 May 2024 11:35:59 +0300
Subject: [PATCH 17/26] Minor renaming

---
 supervision/detection/utils.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py
index cb2545522..7985c7391 100644
--- a/supervision/detection/utils.py
+++ b/supervision/detection/utils.py
@@ -299,18 +299,18 @@ def box_non_max_merge(
 
     while len(order) > 0:
         idx = order[-1]
-        order = order[:-1]
+        merge_candidate = np.expand_dims(predictions[idx], axis=0)
 
+        order = order[:-1]
         if len(order) == 0:
             keep_to_merge_list[idx.tolist()] = []
             break
 
-        candidate = np.expand_dims(predictions[idx], axis=0)
-        ious = box_iou_batch(predictions[order][:, :4], candidate[:, :4])
+        ious = box_iou_batch(predictions[order][:, :4], merge_candidate[:, :4])
 
-        mask = ious < iou_threshold
-        matched_box_indices = np.flip(order[np.where(mask is False)[0]])
-        unmatched_indices = order[np.where(mask is True)[0]]
+        below_threshold = ious < iou_threshold
+        matched_box_indices = np.flip(order[np.where(below_threshold is False)[0]])
+        unmatched_indices = order[np.where(below_threshold is True)[0]]
 
         order = unmatched_indices[scores[unmatched_indices].argsort()]
 

From 0e2eec08c8ed9ccc4ae21f63ca8a6f3ae658ca94 Mon Sep 17 00:00:00 2001
From: Linas Kondrackis <linas.ko+dev@skiff.com>
Date: Wed, 15 May 2024 11:48:48 +0300
Subject: [PATCH 18/26] Revert np.bool comparisons with `is`

* Ruff complains when `== True` is used
* Different behaviour with `is True`
---
 supervision/detection/utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py
index 7985c7391..56420ed6e 100644
--- a/supervision/detection/utils.py
+++ b/supervision/detection/utils.py
@@ -308,9 +308,9 @@ def box_non_max_merge(
 
         ious = box_iou_batch(predictions[order][:, :4], merge_candidate[:, :4])
 
-        below_threshold = ious < iou_threshold
-        matched_box_indices = np.flip(order[np.where(below_threshold is False)[0]])
-        unmatched_indices = order[np.where(below_threshold is True)[0]]
+        below_threshold = (ious < iou_threshold).astype(np.uint8)
+        matched_box_indices = np.flip(order[np.where(below_threshold == 0)[0]])
+        unmatched_indices = order[np.where(below_threshold == 1)[0]]
 
         order = unmatched_indices[scores[unmatched_indices].argsort()]
 

From 559ef90d83507994091cc7d0f76fa79ce9b7a8c1 Mon Sep 17 00:00:00 2001
From: Linas Kondrackis <linas.ko+dev@skiff.com>
Date: Wed, 15 May 2024 11:58:15 +0300
Subject: [PATCH 19/26] Simplify box_non_max_merge

---
 supervision/detection/utils.py | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py
index 56420ed6e..85b741c35 100644
--- a/supervision/detection/utils.py
+++ b/supervision/detection/utils.py
@@ -292,7 +292,7 @@ def box_non_max_merge(
         Dict[int, List[int]]: Mapping from prediction indices
         to keep to a list of prediction indices to be merged.
     """
-    keep_to_merge_list = {}
+    keep_to_merge_list: Dict[int, List[int]] = {}
 
     scores = predictions[:, 4]
     order = scores.argsort()
@@ -307,17 +307,11 @@ def box_non_max_merge(
             break
 
         ious = box_iou_batch(predictions[order][:, :4], merge_candidate[:, :4])
+        ious = ious.flatten()
 
-        below_threshold = (ious < iou_threshold).astype(np.uint8)
-        matched_box_indices = np.flip(order[np.where(below_threshold == 0)[0]])
-        unmatched_indices = order[np.where(below_threshold == 1)[0]]
-
-        order = unmatched_indices[scores[unmatched_indices].argsort()]
-
-        keep_to_merge_list[idx.tolist()] = []
-
-        for matched_box_ind in matched_box_indices.tolist():
-            keep_to_merge_list[idx.tolist()].append(matched_box_ind)
+        above_threshold = ious >= iou_threshold
+        keep_to_merge_list[idx] = np.flip(order[above_threshold]).tolist()
+        order = order[~above_threshold]
 
     return keep_to_merge_list
 

From f8f3647a983529aa2e7f2bff8599d33b2a7ebe83 Mon Sep 17 00:00:00 2001
From: Linas Kondrackis <linas.ko+dev@skiff.com>
Date: Wed, 15 May 2024 15:32:26 +0300
Subject: [PATCH 20/26] Removed suprplus NMM code for 20% speedup

---
 supervision/detection/core.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 2489ef801..2f358c6b7 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -8,7 +8,6 @@
 
 from supervision.config import CLASS_NAME_DATA_FIELD, ORIENTED_BOX_COORDINATES
 from supervision.detection.utils import (
-    box_iou_batch,
     box_non_max_merge,
     box_non_max_merge_batch,
     box_non_max_suppression,
@@ -1231,12 +1230,10 @@ def with_nmm(
         result = []
         for keep_ind, merge_ind_list in keep_to_merge_list.items():
             for merge_ind in merge_ind_list:
-                box_iou = box_iou_batch(self[keep_ind].xyxy, self[merge_ind].xyxy)[0]
-                if box_iou > threshold:
-                    merged_detection = merge_object_detection_pair(
-                        self[keep_ind], self[merge_ind]
-                    )
-                    self._set_at_index(keep_ind, merged_detection)
+                merged_detection = merge_object_detection_pair(
+                    self[keep_ind], self[merge_ind]
+                )
+                self._set_at_index(keep_ind, merged_detection)
             result.append(self[keep_ind])
 
         return Detections.merge(result)

From 9024396f6c49f5f5496dac5347859f07721e1f76 Mon Sep 17 00:00:00 2001
From: Linas Kondrackis <linas.ko+dev@skiff.com>
Date: Fri, 17 May 2024 10:58:45 +0300
Subject: [PATCH 21/26] Add npt.NDarray[x] types, remove resolution_wh default
 val

---
 supervision/detection/utils.py | 58 +++++++++++++++++++---------------
 1 file changed, 32 insertions(+), 26 deletions(-)

diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py
index 85b741c35..db33ab01d 100644
--- a/supervision/detection/utils.py
+++ b/supervision/detection/utils.py
@@ -3,6 +3,7 @@
 
 import cv2
 import numpy as np
+import numpy.typing as npt
 
 from supervision.config import CLASS_NAME_DATA_FIELD
 
@@ -275,14 +276,14 @@ def box_non_max_suppression(
 
 
 def box_non_max_merge(
-    predictions: np.ndarray, iou_threshold: float = 0.5
+    predictions: npt.NDArray[np.float64], iou_threshold: float = 0.5
 ) -> Dict[int, List[int]]:
     """
     Apply greedy version of non-maximum merging to avoid detecting too many
     overlapping bounding boxes for a given object.
 
     Args:
-        predictions (np.ndarray): An array of shape `(n, 5)` containing
+        predictions (npt.NDArray[np.float64]): An array of shape `(n, 5)` containing
             the bounding boxes coordinates in format `[x1, y1, x2, y2]`
             and the confidence scores.
         iou_threshold (float, optional): The intersection-over-union threshold
@@ -317,14 +318,14 @@ def box_non_max_merge(
 
 
 def box_non_max_merge_batch(
-    predictions: np.ndarray, iou_threshold: float = 0.5
+    predictions: npt.NDArray[np.float64], iou_threshold: float = 0.5
 ) -> Dict[int, List[int]]:
     """
     Apply greedy version of non-maximum merging per category to avoid detecting
     too many overlapping bounding boxes for a given object.
 
     Args:
-        predictions (np.ndarray): An array of shape `(n, 6)` containing
+        predictions (npt.NDArray[np.float64]): An array of shape `(n, 6)` containing
             the bounding boxes coordinates in format `[x1, y1, x2, y2]`,
             the confidence scores and class_ids.
         iou_threshold (float, optional): The intersection-over-union threshold
@@ -667,16 +668,18 @@ def process_roboflow_result(
     return xyxy, confidence, class_id, masks, tracker_id, data
 
 
-def move_boxes(xyxy: np.ndarray, offset: np.ndarray) -> np.ndarray:
+def move_boxes(
+    xyxy: npt.NDArray[np.float64], offset: npt.NDArray[np.int32]
+) -> npt.NDArray[np.float64]:
     """
     Parameters:
-        xyxy (np.ndarray): An array of shape `(n, 4)` containing the bounding boxes
-            coordinates in format `[x1, y1, x2, y2]`
+        xyxy (npt.NDArray[np.float64]): An array of shape `(n, 4)` containing the
+            bounding boxes coordinates in format `[x1, y1, x2, y2]`
         offset (np.array): An array of shape `(2,)` containing offset values in format
             is `[dx, dy]`.
 
     Returns:
-        np.ndarray: Repositioned bounding boxes.
+        npt.NDArray[np.float64]: Repositioned bounding boxes.
 
     Example:
         ```python
@@ -697,24 +700,25 @@ def move_boxes(xyxy: np.ndarray, offset: np.ndarray) -> np.ndarray:
 
 
 def move_masks(
-    masks: np.ndarray,
-    offset: np.ndarray,
-    resolution_wh: Tuple[int, int] = None,
-) -> np.ndarray:
+    masks: npt.NDArray[np.bool_],
+    offset: npt.NDArray[np.int32],
+    resolution_wh: Tuple[int, int],
+) -> npt.NDArray[np.bool_]:
     """
     Offset the masks in an array by the specified (x, y) amount.
 
     Args:
-        masks (np.ndarray): A 3D array of binary masks corresponding to the predictions.
-            Shape: `(N, H, W)`, where N is the number of predictions, and H, W are the
-            dimensions of each mask.
-        offset (np.ndarray): An array of shape `(2,)` containing non-negative int values
-            `[dx, dy]`.
+        masks (npt.NDArray[np.bool_]): A 3D array of binary masks corresponding to the
+            predictions. Shape: `(N, H, W)`, where N is the number of predictions, and
+            H, W are the dimensions of each mask.
+        offset (npt.NDArray[np.int32]): An array of shape `(2,)` containing non-negative
+            int values `[dx, dy]`.
         resolution_wh (Tuple[int, int]): The width and height of the desired mask
             resolution.
 
     Returns:
-        (np.ndarray) repositioned masks, optionally padded to the specified shape.
+        (npt.NDArray[np.bool_]) repositioned masks, optionally padded to the specified
+            shape.
     """
 
     if offset[0] < 0 or offset[1] < 0:
@@ -730,19 +734,21 @@ def move_masks(
     return mask_array
 
 
-def scale_boxes(xyxy: np.ndarray, factor: float) -> np.ndarray:
+def scale_boxes(
+    xyxy: npt.NDArray[np.float64], factor: float
+) -> npt.NDArray[np.float64]:
     """
     Scale the dimensions of bounding boxes.
 
     Parameters:
-        xyxy (np.ndarray): An array of shape `(n, 4)` containing the bounding boxes
-            coordinates in format `[x1, y1, x2, y2]`
+        xyxy (npt.NDArray[np.float64]): An array of shape `(n, 4)` containing the
+            bounding boxes coordinates in format `[x1, y1, x2, y2]`
         factor (float): A float value representing the factor by which the box
             dimensions are scaled. A factor greater than 1 enlarges the boxes, while a
             factor less than 1 shrinks them.
 
     Returns:
-        np.ndarray: Scaled bounding boxes.
+        npt.NDArray[np.float64]: Scaled bounding boxes.
 
     Example:
         ```python
@@ -810,19 +816,19 @@ def is_data_equal(data_a: Dict[str, np.ndarray], data_b: Dict[str, np.ndarray])
 
 
 def merge_data(
-    data_list: List[Dict[str, Union[np.ndarray, List]]],
-) -> Dict[str, Union[np.ndarray, List]]:
+    data_list: List[Dict[str, Union[npt.NDArray[np.generic], List]]],
+) -> Dict[str, Union[npt.NDArray[np.generic], List]]:
     """
     Merges the data payloads of a list of Detections instances.
 
     Args:
         data_list: The data payloads of the Detections instances. Each data payload
             is a dictionary with the same keys, and the values are either lists or
-            np.ndarray.
+            npt.NDArray[np.generic].
 
     Returns:
         A single data payload containing the merged data, preserving the original data
-            types (list or np.ndarray).
+            types (list or npt.NDArray[np.generic]).
 
     Raises:
         ValueError: If data values within a single object have different lengths or if

From 6fbca8333e373d06312e823e03ef8899208f1a7a Mon Sep 17 00:00:00 2001
From: Linas Kondrackis <linas.ko+dev@skiff.com>
Date: Thu, 23 May 2024 16:01:34 +0300
Subject: [PATCH 22/26] Address review comments, simplify merge

* Reintroduced iou check before response - necessary for algorithm
---
 supervision/__init__.py        |   3 +-
 supervision/detection/core.py  | 137 ++++++++++++++++++++++-----------
 supervision/detection/utils.py | 118 +++++++++++++++++-----------
 test/detection/test_core.py    |  77 ++++++++++++++----
 test/detection/test_utils.py   |  56 ++++++++------
 5 files changed, 261 insertions(+), 130 deletions(-)

diff --git a/supervision/__init__.py b/supervision/__init__.py
index 03f52086f..816142b90 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -35,7 +35,7 @@
     DetectionDataset,
 )
 from supervision.detection.annotate import BoxAnnotator
-from supervision.detection.core import Detections, merge_object_detection_pair
+from supervision.detection.core import Detections
 from supervision.detection.line_zone import LineZone, LineZoneAnnotator
 from supervision.detection.tools.csv_sink import CSVSink
 from supervision.detection.tools.inference_slicer import InferenceSlicer
@@ -45,7 +45,6 @@
 from supervision.detection.utils import (
     box_iou_batch,
     box_non_max_merge,
-    box_non_max_merge_batch,
     box_non_max_suppression,
     calculate_masks_centroids,
     clip_boxes,
diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 2f358c6b7..6abc8dadd 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -8,8 +8,8 @@
 
 from supervision.config import CLASS_NAME_DATA_FIELD, ORIENTED_BOX_COORDINATES
 from supervision.detection.utils import (
+    box_iou_batch,
     box_non_max_merge,
-    box_non_max_merge_batch,
     box_non_max_suppression,
     calculate_masks_centroids,
     extract_ultralytics_masks,
@@ -1198,24 +1198,21 @@ def with_nmm(
                 after non-maximum merging.
 
         Raises:
-            AssertionError: If `confidence` is None and class_agnostic is False.
-                If `class_id` is None and class_agnostic is False.
+            AssertionError: If `confidence` is None or `class_id` is None and
+                class_agnostic is False.
         """
         if len(self) == 0:
             return self
 
-        assert 0.0 <= threshold <= 1.0, "Threshold must be between 0 and 1."
-
         assert (
             self.confidence is not None
         ), "Detections confidence must be given for NMM to be executed."
 
         if class_agnostic:
             predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))
-            keep_to_merge_list = box_non_max_merge(predictions, threshold)
         else:
             assert self.class_id is not None, (
-                "Detections class_id must be given for NMS to be executed. If you"
+                "Detections class_id must be given for NMM to be executed. If you"
                 " intended to perform class agnostic NMM set class_agnostic=True."
             )
             predictions = np.hstack(
@@ -1225,21 +1222,25 @@ def with_nmm(
                     self.class_id.reshape(-1, 1),
                 )
             )
-            keep_to_merge_list = box_non_max_merge_batch(predictions, threshold)
+
+        merge_groups = box_non_max_merge(
+            predictions=predictions, iou_threshold=threshold
+        )
 
         result = []
-        for keep_ind, merge_ind_list in keep_to_merge_list.items():
-            for merge_ind in merge_ind_list:
-                merged_detection = merge_object_detection_pair(
-                    self[keep_ind], self[merge_ind]
-                )
-                self._set_at_index(keep_ind, merged_detection)
-            result.append(self[keep_ind])
+        for merge_group in merge_groups:
+            unmerged_detections = [self[i] for i in merge_group]
+            merged_detections = _merge_inner_detections_objects(
+                unmerged_detections, threshold
+            )
+            result.append(merged_detections)
 
         return Detections.merge(result)
 
 
-def merge_object_detection_pair(det1: Detections, det2: Detections) -> Detections:
+def _merge_inner_detection_object_pair(
+    detections_1: Detections, detections_2: Detections
+) -> Detections:
     """
     Merges two Detections object into a single Detections object.
     Assumes each Detections contains exactly one object.
@@ -1254,9 +1255,9 @@ def merge_object_detection_pair(det1: Detections, det2: Detections) -> Detection
     single bounding box and mask, respectively.
 
     Args:
-        det1 (Detections):
+        detections_1 (Detections):
             The first Detections object
-        det2 (Detections):
+        detections_2 (Detections):
             The second Detections object
 
     Returns:
@@ -1282,51 +1283,99 @@ def merge_object_detection_pair(det1: Detections, det2: Detections) -> Detection
             detections[0], detections[1])
         ```
     """
-    if len(det1) != 1 or len(det2) != 1:
+    if len(detections_1) != 1 or len(detections_2) != 1:
         raise ValueError("Both Detections should have exactly 1 detected object.")
 
-    if det2.confidence is None:
-        winning_det = det1
-    elif det1.confidence is None:
-        winning_det = det2
-    elif det1.confidence[0] >= det2.confidence[0]:
-        winning_det = det1
+    _verify_fields_both_defined_or_none(detections_1, detections_2)
+
+    if detections_1.confidence is None and detections_2.confidence is None:
+        merged_confidence = None
     else:
-        winning_det = det2
+        area_det1 = (detections_1.xyxy[0][2] - detections_1.xyxy[0][0]) * (
+            detections_1.xyxy[0][3] - detections_1.xyxy[0][1]
+        )
+        area_det2 = (detections_2.xyxy[0][2] - detections_2.xyxy[0][0]) * (
+            detections_2.xyxy[0][3] - detections_2.xyxy[0][1]
+        )
+        merged_confidence = (
+            area_det1 * detections_1.confidence[0]
+            + area_det2 * detections_2.confidence[0]
+        ) / (area_det1 + area_det2)
+        merged_confidence = np.array([merged_confidence])
 
-    area_det1 = (det1.xyxy[0][2] - det1.xyxy[0][0]) * (
-        det1.xyxy[0][3] - det1.xyxy[0][1]
+    merged_x1, merged_y1 = np.minimum(
+        detections_1.xyxy[0][:2], detections_2.xyxy[0][:2]
     )
-    area_det2 = (det2.xyxy[0][2] - det2.xyxy[0][0]) * (
-        det2.xyxy[0][3] - det2.xyxy[0][1]
+    merged_x2, merged_y2 = np.maximum(
+        detections_1.xyxy[0][2:], detections_2.xyxy[0][2:]
     )
+    merged_xyxy = np.array([[merged_x1, merged_y1, merged_x2, merged_y2]])
 
-    merged_x1, merged_y1 = np.minimum(det1.xyxy[0][:2], det2.xyxy[0][:2])
-    merged_x2, merged_y2 = np.maximum(det1.xyxy[0][2:], det2.xyxy[0][2:])
-    merged_xy = np.array([[merged_x1, merged_y1, merged_x2, merged_y2]])
-
-    if det2.mask is None or det1.mask is None:
-        merged_mask = winning_det.mask
+    if detections_1.mask is None and detections_2.mask is None:
+        merged_mask = None
     else:
-        merged_mask = np.logical_or(det1.mask, det2.mask)
+        merged_mask = np.logical_or(detections_1.mask, detections_2.mask)
 
-    if det1.confidence is None or det2.confidence is None:
-        merged_confidence = winning_det.confidence
+    if detections_1.confidence is None and detections_2.confidence is None:
+        winning_det = detections_1
+    elif detections_1.confidence[0] >= detections_2.confidence[0]:
+        winning_det = detections_1
     else:
-        merged_confidence = (
-            area_det1 * det1.confidence[0] + area_det2 * det2.confidence[0]
-        ) / (area_det1 + area_det2)
-        merged_confidence = np.array([merged_confidence])
+        winning_det = detections_2
 
     winning_class_id = winning_det.class_id
     winning_tracker_id = winning_det.tracker_id
     winning_data = winning_det.data
 
     return Detections(
-        xyxy=merged_xy,
+        xyxy=merged_xyxy,
         mask=merged_mask,
         confidence=merged_confidence,
         class_id=winning_class_id,
         tracker_id=winning_tracker_id,
         data=winning_data,
     )
+
+
+def _merge_inner_detections_objects(
+    detections: List[Detections], threshold=0.5
+) -> Detections:
+    """
+    Given N detections each of length 1 (exactly one object inside), combine them into a
+    single detection object of length 1. The contained inner object will be the merged
+    result of all the input detections.
+
+    For example, this lets you merge N boxes into one big box, N masks into one mask,
+    etc.
+    """
+    detections_1 = detections[0]
+    for detections_2 in detections[1:]:
+        box_iou = box_iou_batch(detections_1.xyxy, detections_2.xyxy)[0]
+        if box_iou < threshold:
+            break
+        detections_1 = _merge_inner_detection_object_pair(detections_1, detections_2)
+    return detections_1
+
+
+def _verify_fields_both_defined_or_none(
+    detections_1: Detections, detections_2: Detections
+) -> None:
+    """
+    Verify that for each optional field in the Detections, both instances either have
+    the field set to None or both have it set to non-None values.
+
+    `data` field is ignored.
+
+    Raises:
+        ValueError: If one field is None and the other is not, for any of the fields.
+    """
+    attributes = ["mask", "confidence", "class_id", "tracker_id"]
+    for attribute in attributes:
+        value_1 = getattr(detections_1, attribute)
+        value_2 = getattr(detections_2, attribute)
+
+        if (value_1 is None) != (value_2 is None):
+            raise ValueError(
+                f"Field '{attribute}' should be consistently None or not None in both "
+                "Detections."
+            )
diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py
index db33ab01d..b8b8f7c19 100644
--- a/supervision/detection/utils.py
+++ b/supervision/detection/utils.py
@@ -56,7 +56,8 @@ def box_area(box):
     top_left = np.maximum(boxes_true[:, None, :2], boxes_detection[:, :2])
     bottom_right = np.minimum(boxes_true[:, None, 2:], boxes_detection[:, 2:])
 
-    area_inter = np.prod(np.clip(bottom_right - top_left, a_min=0, a_max=None), 2)
+    area_inter = np.prod(
+        np.clip(bottom_right - top_left, a_min=0, a_max=None), 2)
     return area_inter / (area_true[:, None] + area_detection - area_inter)
 
 
@@ -81,7 +82,8 @@ def _mask_iou_batch_split(
 
     masks_true_area = masks_true.sum(axis=(1, 2))
     masks_detection_area = masks_detection.sum(axis=(1, 2))
-    union_area = masks_true_area[:, None] + masks_detection_area - intersection_area
+    union_area = masks_true_area[:, None] + \
+        masks_detection_area - intersection_area
 
     return np.divide(
         intersection_area,
@@ -132,7 +134,8 @@ def mask_iou_batch(
         1,
     )
     for i in range(0, masks_true.shape[0], step):
-        ious.append(_mask_iou_batch_split(masks_true[i : i + step], masks_detection))
+        ious.append(_mask_iou_batch_split(
+            masks_true[i: i + step], masks_detection))
 
     return np.vstack(ious)
 
@@ -162,7 +165,8 @@ def resize_masks(masks: np.ndarray, max_dimension: int = 640) -> np.ndarray:
 
     resized_masks = masks[:, yv, xv]
 
-    resized_masks = resized_masks.reshape(masks.shape[0], new_height, new_width)
+    resized_masks = resized_masks.reshape(
+        masks.shape[0], new_height, new_width)
     return resized_masks
 
 
@@ -215,8 +219,9 @@ def mask_non_max_suppression(
     keep = np.ones(rows, dtype=bool)
     for i in range(rows):
         if keep[i]:
-            condition = (ious[i] > iou_threshold) & (categories[i] == categories)
-            keep[i + 1 :] = np.where(condition[i + 1 :], False, keep[i + 1 :])
+            condition = (ious[i] > iou_threshold) & (
+                categories[i] == categories)
+            keep[i + 1:] = np.where(condition[i + 1:], False, keep[i + 1:])
 
     return keep[sort_index.argsort()]
 
@@ -275,9 +280,9 @@ def box_non_max_suppression(
     return keep[sort_index.argsort()]
 
 
-def box_non_max_merge(
+def _box_non_max_merge_all(
     predictions: npt.NDArray[np.float64], iou_threshold: float = 0.5
-) -> Dict[int, List[int]]:
+) -> List[List[int]]:
     """
     Apply greedy version of non-maximum merging to avoid detecting too many
     overlapping bounding boxes for a given object.
@@ -290,64 +295,74 @@ def box_non_max_merge(
             to use for non-maximum suppression. Defaults to 0.5.
 
     Returns:
-        Dict[int, List[int]]: Mapping from prediction indices
-        to keep to a list of prediction indices to be merged.
+        List[List[int]]: Groups of prediction indices be merged.
+            Each group may have 1 or more elements.
     """
-    keep_to_merge_list: Dict[int, List[int]] = {}
+    merge_groups: List[List[int]] = []
 
     scores = predictions[:, 4]
     order = scores.argsort()
 
     while len(order) > 0:
-        idx = order[-1]
-        merge_candidate = np.expand_dims(predictions[idx], axis=0)
+        idx = int(order[-1])
 
         order = order[:-1]
         if len(order) == 0:
-            keep_to_merge_list[idx.tolist()] = []
+            merge_groups.append([idx])
             break
 
+        merge_candidate = np.expand_dims(predictions[idx], axis=0)
         ious = box_iou_batch(predictions[order][:, :4], merge_candidate[:, :4])
         ious = ious.flatten()
 
         above_threshold = ious >= iou_threshold
-        keep_to_merge_list[idx] = np.flip(order[above_threshold]).tolist()
+        merge_group = [idx] + np.flip(order[above_threshold]).tolist()
+        merge_groups.append(merge_group)
         order = order[~above_threshold]
-
-    return keep_to_merge_list
+    return merge_groups
 
 
-def box_non_max_merge_batch(
-    predictions: npt.NDArray[np.float64], iou_threshold: float = 0.5
-) -> Dict[int, List[int]]:
+def box_non_max_merge(
+    predictions: npt.NDArray[np.float64],
+    iou_threshold: float = 0.5,
+) -> List[List[int]]:
     """
     Apply greedy version of non-maximum merging per category to avoid detecting
     too many overlapping bounding boxes for a given object.
 
     Args:
-        predictions (npt.NDArray[np.float64]): An array of shape `(n, 6)` containing
-            the bounding boxes coordinates in format `[x1, y1, x2, y2]`,
-            the confidence scores and class_ids.
+        predictions (npt.NDArray[np.float64]): An array of shape `(n, 5)` or `(n, 6)`
+            containing the bounding boxes coordinates in format `[x1, y1, x2, y2]`,
+            the confidence scores and class_ids. Omit class_id column to allow
+            detections of different classes to be merged.
         iou_threshold (float, optional): The intersection-over-union threshold
             to use for non-maximum suppression. Defaults to 0.5.
 
     Returns:
-        Dict[int, List[int]]: Mapping from prediction indices
-        to keep to a list of prediction indices to be merged.
+        List[List[int]]: Groups of prediction indices be merged.
+            Each group may have 1 or more elements.
     """
+    if predictions.shape[1] == 5:
+        return _box_non_max_merge_all(predictions, iou_threshold)
+
     category_ids = predictions[:, 5]
-    keep_to_merge_list = {}
+    merge_groups = []
     for category_id in np.unique(category_ids):
         curr_indices = np.where(category_ids == category_id)[0]
-        curr_keep_to_merge_list = box_non_max_merge(
+        merge_class_groups = _box_non_max_merge_all(
             predictions[curr_indices], iou_threshold
         )
-        curr_indices_list = curr_indices.tolist()
-        for curr_keep, curr_merge_list in curr_keep_to_merge_list.items():
-            keep = curr_indices_list[curr_keep]
-            merge_list = [curr_indices_list[i] for i in curr_merge_list]
-            keep_to_merge_list[keep] = merge_list
-    return keep_to_merge_list
+
+        for merge_class_group in merge_class_groups:
+            merge_groups.append(curr_indices[merge_class_group].tolist())
+
+    for merge_group in merge_groups:
+        if len(merge_group) == 0:
+            raise ValueError(
+                f"Empty group detected when non-max-merging "
+                f"detections: {merge_groups}"
+            )
+    return merge_groups
 
 
 def clip_boxes(xyxy: np.ndarray, resolution_wh: Tuple[int, int]) -> np.ndarray:
@@ -552,7 +567,8 @@ def approximate_polygon(
     approximated_points = polygon
     while True:
         epsilon += epsilon_step
-        new_approximated_points = cv2.approxPolyDP(polygon, epsilon, closed=True)
+        new_approximated_points = cv2.approxPolyDP(
+            polygon, epsilon, closed=True)
         if len(new_approximated_points) > target_points:
             approximated_points = new_approximated_points
         else:
@@ -581,7 +597,8 @@ def extract_ultralytics_masks(yolov8_results) -> Optional[np.ndarray]:
         )
 
     top, left = int(pad[1]), int(pad[0])
-    bottom, right = int(inference_shape[0] - pad[1]), int(inference_shape[1] - pad[0])
+    bottom, right = int(
+        inference_shape[0] - pad[1]), int(inference_shape[1] - pad[0])
 
     mask_maps = []
     masks = yolov8_results.masks.data.cpu().numpy()
@@ -648,7 +665,8 @@ def process_roboflow_result(
             polygon = np.array(
                 [[point["x"], point["y"]] for point in prediction["points"]], dtype=int
             )
-            mask = polygon_to_mask(polygon, resolution_wh=(image_width, image_height))
+            mask = polygon_to_mask(
+                polygon, resolution_wh=(image_width, image_height))
             xyxy.append([x_min, y_min, x_max, y_max])
             class_id.append(prediction["class_id"])
             class_name.append(prediction["class"])
@@ -659,10 +677,12 @@ def process_roboflow_result(
 
     xyxy = np.array(xyxy) if len(xyxy) > 0 else np.empty((0, 4))
     confidence = np.array(confidence) if len(confidence) > 0 else np.empty(0)
-    class_id = np.array(class_id).astype(int) if len(class_id) > 0 else np.empty(0)
+    class_id = np.array(class_id).astype(
+        int) if len(class_id) > 0 else np.empty(0)
     class_name = np.array(class_name) if len(class_name) > 0 else np.empty(0)
     masks = np.array(masks, dtype=bool) if len(masks) > 0 else None
-    tracker_id = np.array(tracker_ids).astype(int) if len(tracker_ids) > 0 else None
+    tracker_id = np.array(tracker_ids).astype(
+        int) if len(tracker_ids) > 0 else None
     data = {CLASS_NAME_DATA_FIELD: class_name}
 
     return xyxy, confidence, class_id, masks, tracker_id, data
@@ -722,13 +742,15 @@ def move_masks(
     """
 
     if offset[0] < 0 or offset[1] < 0:
-        raise ValueError(f"Offset values must be non-negative integers. Got: {offset}")
+        raise ValueError(
+            f"Offset values must be non-negative integers. Got: {offset}")
 
-    mask_array = np.full((masks.shape[0], resolution_wh[1], resolution_wh[0]), False)
+    mask_array = np.full(
+        (masks.shape[0], resolution_wh[1], resolution_wh[0]), False)
     mask_array[
         :,
-        offset[1] : masks.shape[1] + offset[1],
-        offset[0] : masks.shape[2] + offset[0],
+        offset[1]: masks.shape[1] + offset[1],
+        offset[0]: masks.shape[2] + offset[0],
     ] = masks
 
     return mask_array
@@ -794,8 +816,10 @@ def sum_over_mask(indices: np.ndarray, axis: tuple) -> np.ndarray:
         return np.tensordot(masks, indices, axes=axis)
 
     aggregation_axis = ([1, 2], [0, 1])
-    centroid_x = sum_over_mask(horizontal_indices, aggregation_axis) / total_pixels
-    centroid_y = sum_over_mask(vertical_indices, aggregation_axis) / total_pixels
+    centroid_x = sum_over_mask(
+        horizontal_indices, aggregation_axis) / total_pixels
+    centroid_y = sum_over_mask(
+        vertical_indices, aggregation_axis) / total_pixels
 
     return np.column_stack((centroid_x, centroid_y)).astype(int)
 
@@ -873,7 +897,8 @@ def merge_data(
             elif ndim > 1:
                 merged_data[key] = np.vstack(merged_data[key])
             else:
-                raise ValueError(f"Unexpected array dimension for key '{key}'.")
+                raise ValueError(
+                    f"Unexpected array dimension for key '{key}'.")
         else:
             raise ValueError(
                 f"Inconsistent data types for key '{key}'. Only np.ndarray and list "
@@ -918,6 +943,7 @@ def get_data_item(
             else:
                 raise TypeError(f"Unsupported index type: {type(index)}")
         else:
-            raise TypeError(f"Unsupported data type for key '{key}': {type(value)}")
+            raise TypeError(
+                f"Unsupported data type for key '{key}': {type(value)}")
 
     return subset_data
diff --git a/test/detection/test_core.py b/test/detection/test_core.py
index 31e56decd..bef511e53 100644
--- a/test/detection/test_core.py
+++ b/test/detection/test_core.py
@@ -5,7 +5,7 @@
 import numpy as np
 import pytest
 
-from supervision.detection.core import Detections, merge_object_detection_pair
+from supervision.detection.core import Detections, _merge_inner_detection_object_pair
 from supervision.geometry.core import Position
 
 PREDICTIONS = np.array(
@@ -193,7 +193,8 @@
             DoesNotRaise(),
         ),  # take only first detection by index slice (1, 3)
         (DETECTIONS, 10, None, pytest.raises(IndexError)),  # index out of range
-        (DETECTIONS, [0, 2, 10], None, pytest.raises(IndexError)),  # index out of range
+        (DETECTIONS, [0, 2, 10], None, pytest.raises(
+            IndexError)),  # index out of range
         (DETECTIONS, np.array([0, 2, 10]), None, pytest.raises(IndexError)),
         (
             DETECTIONS,
@@ -482,7 +483,7 @@ def test_equal(
                 data={"key_1": [1]},
             ),
             DoesNotRaise(),
-        ),  # Same confidence - merge box & mask, tiebreak to detection_1
+        ),  # Same confidence - merge box & mask, tie-break to detection_1
         (
             mock_detections(
                 xyxy=[[0, 0, 20, 20]],
@@ -512,7 +513,7 @@ def test_equal(
         ),  # Different confidence, different area
         (
             mock_detections(
-                xyxy=[[0, 0, 20, 20]],
+                xyxy=[[10, 10, 30, 30]],
                 confidence=None,
                 class_id=[1],
                 mask=[np.array([[1, 1, 0], [1, 1, 0], [0, 0, 0]], dtype=bool)],
@@ -520,31 +521,79 @@ def test_equal(
                 data={"key_1": [1]},
             ),
             mock_detections(
-                xyxy=[[10, 10, 30, 30]],
-                confidence=[0.2],
+                xyxy=[[20, 20, 40, 40]],
+                confidence=None,
                 class_id=[2],
                 mask=[np.array([[0, 0, 0], [0, 1, 1], [0, 1, 1]], dtype=bool)],
                 tracker_id=[2],
                 data={"key_2": [2]},
             ),
             mock_detections(
-                xyxy=[[0, 0, 30, 30]],
-                confidence=[0.2],
-                class_id=[2],
+                xyxy=[[10, 10, 40, 40]],
+                confidence=None,
+                class_id=[1],
                 mask=[np.array([[1, 1, 0], [1, 1, 1], [0, 1, 1]], dtype=bool)],
-                tracker_id=[2],
-                data={"key_2": [2]},
+                tracker_id=[1],
+                data={"key_1": [1]},
             ),
             DoesNotRaise(),
-        ),  # merge with no confidence
+        ),  # No confidence at all
+        (
+            mock_detections(
+                xyxy=[[0, 0, 20, 20]],
+                confidence=None,
+            ),
+            mock_detections(
+                xyxy=[[10, 10, 30, 30]],
+                confidence=[0.2],
+            ),
+            None,
+            pytest.raises(ValueError),
+        ),  # confidence: None + [x]
+            (
+            mock_detections(
+                xyxy=[[0, 0, 20, 20]],
+                mask=[np.array([[1, 1, 0], [1, 1, 0], [0, 0, 0]], dtype=bool)],
+            ),
+            mock_detections(
+                xyxy=[[10, 10, 30, 30]],
+                mask=None,
+            ),
+            None,
+            pytest.raises(ValueError),
+        ),  # mask: None + [x]
+        (
+            mock_detections(
+                xyxy=[[0, 0, 20, 20]],
+                tracker_id=[1]
+            ),
+            mock_detections(
+                xyxy=[[10, 10, 30, 30]],
+                tracker_id=None,
+            ),
+            None,
+            pytest.raises(ValueError),
+        ),  # tracker_id: None + []
+        (
+            mock_detections(
+                xyxy=[[0, 0, 20, 20]],
+                class_id=[1]
+            ),
+            mock_detections(
+                xyxy=[[10, 10, 30, 30]],
+                class_id=None,
+            ),
+            None,
+            pytest.raises(ValueError),
+        )  # class_id: None + []
     ],
 )
-def test_merge_object_detection_pair(
+def test_merge_inner_detection_object_pair(
     detection_1: Detections,
     detection_2: Detections,
     expected_result: Optional[Detections],
     exception: Exception,
 ):
     with exception:
-        result = merge_object_detection_pair(detection_1, detection_2)
+        result = _merge_inner_detection_object_pair(detection_1, detection_2)
         assert result == expected_result
diff --git a/test/detection/test_utils.py b/test/detection/test_utils.py
index e6f330841..cb7537e19 100644
--- a/test/detection/test_utils.py
+++ b/test/detection/test_utils.py
@@ -6,7 +6,7 @@
 
 from supervision.config import CLASS_NAME_DATA_FIELD
 from supervision.detection.utils import (
-    box_non_max_merge,
+    _box_non_max_merge_all,
     box_non_max_suppression,
     calculate_masks_centroids,
     clip_boxes,
@@ -134,67 +134,67 @@ def test_box_non_max_suppression(
         (
             np.empty(shape=(0, 5), dtype=float),
             0.5,
-            {},
+            [],
             DoesNotRaise(),
         ),
         (
             np.array([[0, 0, 10, 10, 1.0]]),
             0.5,
-            {0: []},
+            [[0]],
             DoesNotRaise(),
         ),
         (
             np.array([[0, 0, 10, 10, 1.0], [0, 0, 9, 9, 1.0]]),
             0.5,
-            {1: [0]},
+            [[1, 0]],
             DoesNotRaise(),
         ),  # High overlap, tie-break to second det
         (
             np.array([[0, 0, 10, 10, 1.0], [0, 0, 9, 9, 0.99]]),
             0.5,
-            {0: [1]},
+            [[0, 1]],
             DoesNotRaise(),
         ),  # High overlap, merge to high confidence
         (
             np.array([[0, 0, 10, 10, 0.99], [0, 0, 9, 9, 1.0]]),
             0.5,
-            {1: [0]},
+            [[1, 0]],
             DoesNotRaise(),
         ),  # (test symmetry) High overlap, merge to high confidence
         (
-            np.array([[0, 0, 10, 10, 0.99], [0, 0, 9, 9, 1.0]]),
+            np.array([[0, 0, 10, 10, 0.90], [0, 0, 9, 9, 1.0]]),
             0.5,
-            {1: [0]},
+            [[1, 0]],
             DoesNotRaise(),
         ),  # (test symmetry) High overlap, merge to high confidence
         (
             np.array([[0, 0, 10, 10, 1.0], [0, 0, 9, 9, 1.0]]),
             1.0,
-            {0: [], 1: []},
+            [[1], [0]],
             DoesNotRaise(),
         ),  # High IOU required
         (
             np.array([[0, 0, 10, 10, 1.0], [0, 0, 9, 9, 1.0]]),
             0.0,
-            {1: [0]},
+            [[1, 0]],
             DoesNotRaise(),
         ),  # No IOU required
         (
             np.array([[0, 0, 10, 10, 1.0], [0, 0, 5, 5, 0.9]]),
             0.25,
-            {0: [1]},
+            [[0, 1]],
             DoesNotRaise(),
         ),  # Below IOU requirement
         (
             np.array([[0, 0, 10, 10, 1.0], [0, 0, 5, 5, 0.9]]),
             0.26,
-            {0: [], 1: []},
+            [[0], [1]],
             DoesNotRaise(),
         ),  # Above IOU requirement
         (
             np.array([[0, 0, 10, 10, 1.0], [0, 0, 9, 9, 1.0], [0, 0, 8, 8, 1.0]]),
             0.5,
-            {2: [1, 0]},
+            [[2, 1, 0]],
             DoesNotRaise(),
         ),  # 3 boxes
         (
@@ -208,7 +208,7 @@ def test_box_non_max_suppression(
                 ]
             ),
             0.5,
-            {1: [0], 3: [2], 4: []},
+            [[4], [3, 2], [1, 0]],
             DoesNotRaise(),
         ),  # 5 boxes, 2 merges, 1 separate
         (
@@ -222,7 +222,7 @@ def test_box_non_max_suppression(
                 ]
             ),
             0.33,
-            {0: [], 2: [1], 4: [3]},
+            [[4, 3], [2, 1], [0]],
             DoesNotRaise(),
         ),  # sequential merge, half overlap
         (
@@ -236,7 +236,7 @@ def test_box_non_max_suppression(
                 ]
             ),
             0.33,
-            {0: [], 2: [3, 1], 4: []},
+            [[2, 3, 1], [4], [0]],
             DoesNotRaise(),
         ),  # confidence
     ],
@@ -244,11 +244,13 @@ def test_box_non_max_suppression(
 def test_box_non_max_merge(
     predictions: np.ndarray,
     iou_threshold: float,
-    expected_result: Dict[int, List[int]],
+    expected_result: List[List[int]],
     exception: Exception,
 ) -> None:
     with exception:
-        result = box_non_max_merge(predictions=predictions, iou_threshold=iou_threshold)
+        result = _box_non_max_merge_all(
+            predictions=predictions, iou_threshold=iou_threshold
+        )
 
         assert result == expected_result
 
@@ -664,7 +666,8 @@ def test_filter_polygons_by_area(
                 "image": {"width": 1000, "height": 1000},
             },
             (
-                np.array([[175.0, 275.0, 225.0, 325.0], [450.0, 450.0, 550.0, 550.0]]),
+                np.array([[175.0, 275.0, 225.0, 325.0],
+                         [450.0, 450.0, 550.0, 550.0]]),
                 np.array([0.9, 0.8]),
                 np.array([0, 7]),
                 None,
@@ -1118,8 +1121,10 @@ def test_calculate_masks_centroids(
         ),  # two data dicts with the same field name and np.array values as 2D arrays
         (
             [
-                {"test_1": np.array([1, 2, 3]), "test_2": np.array(["a", "b", "c"])},
-                {"test_1": np.array([3, 2, 1]), "test_2": np.array(["c", "b", "a"])},
+                {"test_1": np.array([1, 2, 3]),
+                 "test_2": np.array(["a", "b", "c"])},
+                {"test_1": np.array([3, 2, 1]),
+                 "test_2": np.array(["c", "b", "a"])},
             ],
             {
                 "test_1": np.array([1, 2, 3, 3, 2, 1]),
@@ -1148,8 +1153,10 @@ def test_calculate_masks_centroids(
         ),  # two data dicts with the same field name and 1D and 2D arrays values
         (
             [
-                {"test_1": np.array([1, 2, 3]), "test_2": np.array(["a", "b"])},
-                {"test_1": np.array([3, 2, 1]), "test_2": np.array(["c", "b", "a"])},
+                {"test_1": np.array([1, 2, 3]),
+                 "test_2": np.array(["a", "b"])},
+                {"test_1": np.array([3, 2, 1]),
+                 "test_2": np.array(["c", "b", "a"])},
             ],
             None,
             pytest.raises(ValueError),
@@ -1160,7 +1167,8 @@ def test_calculate_masks_centroids(
             DoesNotRaise(),
         ),  # two data dicts; one empty and one non-empty dict
         (
-            [{"test_1": [], "test_2": []}, {"test_1": [1, 2, 3], "test_2": [1, 2, 3]}],
+            [{"test_1": [], "test_2": []}, {
+                "test_1": [1, 2, 3], "test_2": [1, 2, 3]}],
             {"test_1": [1, 2, 3], "test_2": [1, 2, 3]},
             DoesNotRaise(),
         ),  # two data dicts; one empty and one non-empty dict; same keys

From db1b4737fec31de88de5c0f946faf95a4ca88372 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 23 May 2024 13:04:09 +0000
Subject: [PATCH 23/26] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?=
 =?UTF-8?q?=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/detection/utils.py | 54 ++++++++++++----------------------
 test/detection/test_core.py    | 17 ++++-------
 test/detection/test_utils.py   | 18 ++++--------
 3 files changed, 30 insertions(+), 59 deletions(-)

diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py
index b8b8f7c19..4beea2ed5 100644
--- a/supervision/detection/utils.py
+++ b/supervision/detection/utils.py
@@ -56,8 +56,7 @@ def box_area(box):
     top_left = np.maximum(boxes_true[:, None, :2], boxes_detection[:, :2])
     bottom_right = np.minimum(boxes_true[:, None, 2:], boxes_detection[:, 2:])
 
-    area_inter = np.prod(
-        np.clip(bottom_right - top_left, a_min=0, a_max=None), 2)
+    area_inter = np.prod(np.clip(bottom_right - top_left, a_min=0, a_max=None), 2)
     return area_inter / (area_true[:, None] + area_detection - area_inter)
 
 
@@ -82,8 +81,7 @@ def _mask_iou_batch_split(
 
     masks_true_area = masks_true.sum(axis=(1, 2))
     masks_detection_area = masks_detection.sum(axis=(1, 2))
-    union_area = masks_true_area[:, None] + \
-        masks_detection_area - intersection_area
+    union_area = masks_true_area[:, None] + masks_detection_area - intersection_area
 
     return np.divide(
         intersection_area,
@@ -134,8 +132,7 @@ def mask_iou_batch(
         1,
     )
     for i in range(0, masks_true.shape[0], step):
-        ious.append(_mask_iou_batch_split(
-            masks_true[i: i + step], masks_detection))
+        ious.append(_mask_iou_batch_split(masks_true[i : i + step], masks_detection))
 
     return np.vstack(ious)
 
@@ -165,8 +162,7 @@ def resize_masks(masks: np.ndarray, max_dimension: int = 640) -> np.ndarray:
 
     resized_masks = masks[:, yv, xv]
 
-    resized_masks = resized_masks.reshape(
-        masks.shape[0], new_height, new_width)
+    resized_masks = resized_masks.reshape(masks.shape[0], new_height, new_width)
     return resized_masks
 
 
@@ -219,9 +215,8 @@ def mask_non_max_suppression(
     keep = np.ones(rows, dtype=bool)
     for i in range(rows):
         if keep[i]:
-            condition = (ious[i] > iou_threshold) & (
-                categories[i] == categories)
-            keep[i + 1:] = np.where(condition[i + 1:], False, keep[i + 1:])
+            condition = (ious[i] > iou_threshold) & (categories[i] == categories)
+            keep[i + 1 :] = np.where(condition[i + 1 :], False, keep[i + 1 :])
 
     return keep[sort_index.argsort()]
 
@@ -567,8 +562,7 @@ def approximate_polygon(
     approximated_points = polygon
     while True:
         epsilon += epsilon_step
-        new_approximated_points = cv2.approxPolyDP(
-            polygon, epsilon, closed=True)
+        new_approximated_points = cv2.approxPolyDP(polygon, epsilon, closed=True)
         if len(new_approximated_points) > target_points:
             approximated_points = new_approximated_points
         else:
@@ -597,8 +591,7 @@ def extract_ultralytics_masks(yolov8_results) -> Optional[np.ndarray]:
         )
 
     top, left = int(pad[1]), int(pad[0])
-    bottom, right = int(
-        inference_shape[0] - pad[1]), int(inference_shape[1] - pad[0])
+    bottom, right = int(inference_shape[0] - pad[1]), int(inference_shape[1] - pad[0])
 
     mask_maps = []
     masks = yolov8_results.masks.data.cpu().numpy()
@@ -665,8 +658,7 @@ def process_roboflow_result(
             polygon = np.array(
                 [[point["x"], point["y"]] for point in prediction["points"]], dtype=int
             )
-            mask = polygon_to_mask(
-                polygon, resolution_wh=(image_width, image_height))
+            mask = polygon_to_mask(polygon, resolution_wh=(image_width, image_height))
             xyxy.append([x_min, y_min, x_max, y_max])
             class_id.append(prediction["class_id"])
             class_name.append(prediction["class"])
@@ -677,12 +669,10 @@ def process_roboflow_result(
 
     xyxy = np.array(xyxy) if len(xyxy) > 0 else np.empty((0, 4))
     confidence = np.array(confidence) if len(confidence) > 0 else np.empty(0)
-    class_id = np.array(class_id).astype(
-        int) if len(class_id) > 0 else np.empty(0)
+    class_id = np.array(class_id).astype(int) if len(class_id) > 0 else np.empty(0)
     class_name = np.array(class_name) if len(class_name) > 0 else np.empty(0)
     masks = np.array(masks, dtype=bool) if len(masks) > 0 else None
-    tracker_id = np.array(tracker_ids).astype(
-        int) if len(tracker_ids) > 0 else None
+    tracker_id = np.array(tracker_ids).astype(int) if len(tracker_ids) > 0 else None
     data = {CLASS_NAME_DATA_FIELD: class_name}
 
     return xyxy, confidence, class_id, masks, tracker_id, data
@@ -742,15 +732,13 @@ def move_masks(
     """
 
     if offset[0] < 0 or offset[1] < 0:
-        raise ValueError(
-            f"Offset values must be non-negative integers. Got: {offset}")
+        raise ValueError(f"Offset values must be non-negative integers. Got: {offset}")
 
-    mask_array = np.full(
-        (masks.shape[0], resolution_wh[1], resolution_wh[0]), False)
+    mask_array = np.full((masks.shape[0], resolution_wh[1], resolution_wh[0]), False)
     mask_array[
         :,
-        offset[1]: masks.shape[1] + offset[1],
-        offset[0]: masks.shape[2] + offset[0],
+        offset[1] : masks.shape[1] + offset[1],
+        offset[0] : masks.shape[2] + offset[0],
     ] = masks
 
     return mask_array
@@ -816,10 +804,8 @@ def sum_over_mask(indices: np.ndarray, axis: tuple) -> np.ndarray:
         return np.tensordot(masks, indices, axes=axis)
 
     aggregation_axis = ([1, 2], [0, 1])
-    centroid_x = sum_over_mask(
-        horizontal_indices, aggregation_axis) / total_pixels
-    centroid_y = sum_over_mask(
-        vertical_indices, aggregation_axis) / total_pixels
+    centroid_x = sum_over_mask(horizontal_indices, aggregation_axis) / total_pixels
+    centroid_y = sum_over_mask(vertical_indices, aggregation_axis) / total_pixels
 
     return np.column_stack((centroid_x, centroid_y)).astype(int)
 
@@ -897,8 +883,7 @@ def merge_data(
             elif ndim > 1:
                 merged_data[key] = np.vstack(merged_data[key])
             else:
-                raise ValueError(
-                    f"Unexpected array dimension for key '{key}'.")
+                raise ValueError(f"Unexpected array dimension for key '{key}'.")
         else:
             raise ValueError(
                 f"Inconsistent data types for key '{key}'. Only np.ndarray and list "
@@ -943,7 +928,6 @@ def get_data_item(
             else:
                 raise TypeError(f"Unsupported index type: {type(index)}")
         else:
-            raise TypeError(
-                f"Unsupported data type for key '{key}': {type(value)}")
+            raise TypeError(f"Unsupported data type for key '{key}': {type(value)}")
 
     return subset_data
diff --git a/test/detection/test_core.py b/test/detection/test_core.py
index bef511e53..dc58c9e8c 100644
--- a/test/detection/test_core.py
+++ b/test/detection/test_core.py
@@ -193,8 +193,7 @@
             DoesNotRaise(),
         ),  # take only first detection by index slice (1, 3)
         (DETECTIONS, 10, None, pytest.raises(IndexError)),  # index out of range
-        (DETECTIONS, [0, 2, 10], None, pytest.raises(
-            IndexError)),  # index out of range
+        (DETECTIONS, [0, 2, 10], None, pytest.raises(IndexError)),  # index out of range
         (DETECTIONS, np.array([0, 2, 10]), None, pytest.raises(IndexError)),
         (
             DETECTIONS,
@@ -550,7 +549,7 @@ def test_equal(
             None,
             pytest.raises(ValueError),
         ),  # confidence: None + [x]
-            (
+        (
             mock_detections(
                 xyxy=[[0, 0, 20, 20]],
                 mask=[np.array([[1, 1, 0], [1, 1, 0], [0, 0, 0]], dtype=bool)],
@@ -563,10 +562,7 @@ def test_equal(
             pytest.raises(ValueError),
         ),  # mask: None + [x]
         (
-            mock_detections(
-                xyxy=[[0, 0, 20, 20]],
-                tracker_id=[1]
-            ),
+            mock_detections(xyxy=[[0, 0, 20, 20]], tracker_id=[1]),
             mock_detections(
                 xyxy=[[10, 10, 30, 30]],
                 tracker_id=None,
@@ -575,17 +571,14 @@ def test_equal(
             pytest.raises(ValueError),
         ),  # tracker_id: None + []
         (
-            mock_detections(
-                xyxy=[[0, 0, 20, 20]],
-                class_id=[1]
-            ),
+            mock_detections(xyxy=[[0, 0, 20, 20]], class_id=[1]),
             mock_detections(
                 xyxy=[[10, 10, 30, 30]],
                 class_id=None,
             ),
             None,
             pytest.raises(ValueError),
-        )  # class_id: None + []
+        ),  # class_id: None + []
     ],
 )
 def test_merge_inner_detection_object_pair(
diff --git a/test/detection/test_utils.py b/test/detection/test_utils.py
index cb7537e19..9a1fa8c93 100644
--- a/test/detection/test_utils.py
+++ b/test/detection/test_utils.py
@@ -666,8 +666,7 @@ def test_filter_polygons_by_area(
                 "image": {"width": 1000, "height": 1000},
             },
             (
-                np.array([[175.0, 275.0, 225.0, 325.0],
-                         [450.0, 450.0, 550.0, 550.0]]),
+                np.array([[175.0, 275.0, 225.0, 325.0], [450.0, 450.0, 550.0, 550.0]]),
                 np.array([0.9, 0.8]),
                 np.array([0, 7]),
                 None,
@@ -1121,10 +1120,8 @@ def test_calculate_masks_centroids(
         ),  # two data dicts with the same field name and np.array values as 2D arrays
         (
             [
-                {"test_1": np.array([1, 2, 3]),
-                 "test_2": np.array(["a", "b", "c"])},
-                {"test_1": np.array([3, 2, 1]),
-                 "test_2": np.array(["c", "b", "a"])},
+                {"test_1": np.array([1, 2, 3]), "test_2": np.array(["a", "b", "c"])},
+                {"test_1": np.array([3, 2, 1]), "test_2": np.array(["c", "b", "a"])},
             ],
             {
                 "test_1": np.array([1, 2, 3, 3, 2, 1]),
@@ -1153,10 +1150,8 @@ def test_calculate_masks_centroids(
         ),  # two data dicts with the same field name and 1D and 2D arrays values
         (
             [
-                {"test_1": np.array([1, 2, 3]),
-                 "test_2": np.array(["a", "b"])},
-                {"test_1": np.array([3, 2, 1]),
-                 "test_2": np.array(["c", "b", "a"])},
+                {"test_1": np.array([1, 2, 3]), "test_2": np.array(["a", "b"])},
+                {"test_1": np.array([3, 2, 1]), "test_2": np.array(["c", "b", "a"])},
             ],
             None,
             pytest.raises(ValueError),
@@ -1167,8 +1162,7 @@ def test_calculate_masks_centroids(
             DoesNotRaise(),
         ),  # two data dicts; one empty and one non-empty dict
         (
-            [{"test_1": [], "test_2": []}, {
-                "test_1": [1, 2, 3], "test_2": [1, 2, 3]}],
+            [{"test_1": [], "test_2": []}, {"test_1": [1, 2, 3], "test_2": [1, 2, 3]}],
             {"test_1": [1, 2, 3], "test_2": [1, 2, 3]},
             DoesNotRaise(),
         ),  # two data dicts; one empty and one non-empty dict; same keys

From 0721bc289b8f9cea901ac3e9004e2b305f618c9b Mon Sep 17 00:00:00 2001
From: Linas Kondrackis <linas.ko+dev@skiff.com>
Date: Thu, 23 May 2024 16:21:54 +0300
Subject: [PATCH 24/26] Remove _set_at_index

---
 supervision/detection/core.py | 27 ---------------------------
 1 file changed, 27 deletions(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 6abc8dadd..069eaf09c 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -1068,33 +1068,6 @@ def __setitem__(self, key: str, value: Union[np.ndarray, List]):
 
         self.data[key] = value
 
-    def _set_at_index(self, index: int, other: Detections):
-        """
-        Set detection values (xyxy, confidence, ...) at a specified index
-        to those of another Detections object, at index 0.
-
-        Args:
-            index (int): The index in current detection, where values
-                will be set.
-            other (Detections): Detections object with exactly one element
-                to set the values from.
-
-        Raises:
-            ValueError: If `other` is not made of exactly one element.
-        """
-        if len(other) != 1:
-            raise ValueError("Detection to set from must have exactly one element.")
-
-        self.xyxy[index] = other.xyxy[0]
-        if self.mask is not None and other.mask is not None:
-            self.mask[index] = other.mask[0]
-        if self.confidence is not None and other.confidence is not None:
-            self.confidence[index] = other.confidence[0]
-        if self.class_id is not None and other.class_id is not None:
-            self.class_id[index] = other.class_id[0]
-        if self.tracker_id is not None and other.tracker_id is not None:
-            self.tracker_id[index] = other.tracker_id[0]
-
     @property
     def area(self) -> np.ndarray:
         """

From 530e1d01e152e45bd9f5bb37553f8bacbc6aeb75 Mon Sep 17 00:00:00 2001
From: Linas Kondrackis <linas.ko+dev@skiff.com>
Date: Mon, 27 May 2024 16:17:27 +0300
Subject: [PATCH 25/26] Address comments

---
 supervision/detection/core.py | 52 ++++++++++++++---------------------
 test/detection/test_core.py   |  4 +--
 2 files changed, 23 insertions(+), 33 deletions(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 069eaf09c..f85d403d7 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -1203,7 +1203,7 @@ def with_nmm(
         result = []
         for merge_group in merge_groups:
             unmerged_detections = [self[i] for i in merge_group]
-            merged_detections = _merge_inner_detections_objects(
+            merged_detections = merge_inner_detections_objects(
                 unmerged_detections, threshold
             )
             result.append(merged_detections)
@@ -1211,7 +1211,7 @@ def with_nmm(
         return Detections.merge(result)
 
 
-def _merge_inner_detection_object_pair(
+def merge_inner_detection_object_pair(
     detections_1: Detections, detections_2: Detections
 ) -> Detections:
     """
@@ -1259,29 +1259,23 @@ def _merge_inner_detection_object_pair(
     if len(detections_1) != 1 or len(detections_2) != 1:
         raise ValueError("Both Detections should have exactly 1 detected object.")
 
-    _verify_fields_both_defined_or_none(detections_1, detections_2)
+    validate_fields_both_defined_or_none(detections_1, detections_2)
 
+    xyxy_1 = detections_1.xyxy[0]
+    xyxy_2 = detections_2.xyxy[0]
     if detections_1.confidence is None and detections_2.confidence is None:
         merged_confidence = None
     else:
-        area_det1 = (detections_1.xyxy[0][2] - detections_1.xyxy[0][0]) * (
-            detections_1.xyxy[0][3] - detections_1.xyxy[0][1]
-        )
-        area_det2 = (detections_2.xyxy[0][2] - detections_2.xyxy[0][0]) * (
-            detections_2.xyxy[0][3] - detections_2.xyxy[0][1]
-        )
+        detection_1_area = (xyxy_1[2] - xyxy_1[0]) * (xyxy_1[3] - xyxy_1[1])
+        detections_2_area = (xyxy_2[2] - xyxy_2[0]) * (xyxy_2[3] - xyxy_2[1])
         merged_confidence = (
-            area_det1 * detections_1.confidence[0]
-            + area_det2 * detections_2.confidence[0]
-        ) / (area_det1 + area_det2)
+            detection_1_area * detections_1.confidence[0]
+            + detections_2_area * detections_2.confidence[0]
+        ) / (detection_1_area + detections_2_area)
         merged_confidence = np.array([merged_confidence])
 
-    merged_x1, merged_y1 = np.minimum(
-        detections_1.xyxy[0][:2], detections_2.xyxy[0][:2]
-    )
-    merged_x2, merged_y2 = np.maximum(
-        detections_1.xyxy[0][2:], detections_2.xyxy[0][2:]
-    )
+    merged_x1, merged_y1 = np.minimum(xyxy_1[:2], xyxy_2[:2])
+    merged_x2, merged_y2 = np.maximum(xyxy_1[2:], xyxy_2[2:])
     merged_xyxy = np.array([[merged_x1, merged_y1, merged_x2, merged_y2]])
 
     if detections_1.mask is None and detections_2.mask is None:
@@ -1290,27 +1284,23 @@ def _merge_inner_detection_object_pair(
         merged_mask = np.logical_or(detections_1.mask, detections_2.mask)
 
     if detections_1.confidence is None and detections_2.confidence is None:
-        winning_det = detections_1
+        winning_detection = detections_1
     elif detections_1.confidence[0] >= detections_2.confidence[0]:
-        winning_det = detections_1
+        winning_detection = detections_1
     else:
-        winning_det = detections_2
-
-    winning_class_id = winning_det.class_id
-    winning_tracker_id = winning_det.tracker_id
-    winning_data = winning_det.data
+        winning_detection = detections_2
 
     return Detections(
         xyxy=merged_xyxy,
         mask=merged_mask,
         confidence=merged_confidence,
-        class_id=winning_class_id,
-        tracker_id=winning_tracker_id,
-        data=winning_data,
+        class_id=winning_detection.class_id,
+        tracker_id=winning_detection.tracker_id,
+        data=winning_detection.data,
     )
 
 
-def _merge_inner_detections_objects(
+def merge_inner_detections_objects(
     detections: List[Detections], threshold=0.5
 ) -> Detections:
     """
@@ -1326,11 +1316,11 @@ def _merge_inner_detections_objects(
         box_iou = box_iou_batch(detections_1.xyxy, detections_2.xyxy)[0]
         if box_iou < threshold:
             break
-        detections_1 = _merge_inner_detection_object_pair(detections_1, detections_2)
+        detections_1 = merge_inner_detection_object_pair(detections_1, detections_2)
     return detections_1
 
 
-def _verify_fields_both_defined_or_none(
+def validate_fields_both_defined_or_none(
     detections_1: Detections, detections_2: Detections
 ) -> None:
     """
diff --git a/test/detection/test_core.py b/test/detection/test_core.py
index dc58c9e8c..af1d58762 100644
--- a/test/detection/test_core.py
+++ b/test/detection/test_core.py
@@ -5,7 +5,7 @@
 import numpy as np
 import pytest
 
-from supervision.detection.core import Detections, _merge_inner_detection_object_pair
+from supervision.detection.core import Detections, merge_inner_detection_object_pair
 from supervision.geometry.core import Position
 
 PREDICTIONS = np.array(
@@ -588,5 +588,5 @@ def test_merge_inner_detection_object_pair(
     exception: Exception,
 ):
     with exception:
-        result = _merge_inner_detection_object_pair(detection_1, detection_2)
+        result = merge_inner_detection_object_pair(detection_1, detection_2)
         assert result == expected_result

From 2ee9e08446a071c50ff8acf000f80fdc0bb6c0a9 Mon Sep 17 00:00:00 2001
From: Linas Kondrackis <linas.ko+dev@skiff.com>
Date: Mon, 27 May 2024 16:21:40 +0300
Subject: [PATCH 26/26] Renamed to group_overlapping_boxes

---
 supervision/detection/utils.py | 6 +++---
 test/detection/test_utils.py   | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py
index 4beea2ed5..74726995e 100644
--- a/supervision/detection/utils.py
+++ b/supervision/detection/utils.py
@@ -275,7 +275,7 @@ def box_non_max_suppression(
     return keep[sort_index.argsort()]
 
 
-def _box_non_max_merge_all(
+def group_overlapping_boxes(
     predictions: npt.NDArray[np.float64], iou_threshold: float = 0.5
 ) -> List[List[int]]:
     """
@@ -338,13 +338,13 @@ def box_non_max_merge(
             Each group may have 1 or more elements.
     """
     if predictions.shape[1] == 5:
-        return _box_non_max_merge_all(predictions, iou_threshold)
+        return group_overlapping_boxes(predictions, iou_threshold)
 
     category_ids = predictions[:, 5]
     merge_groups = []
     for category_id in np.unique(category_ids):
         curr_indices = np.where(category_ids == category_id)[0]
-        merge_class_groups = _box_non_max_merge_all(
+        merge_class_groups = group_overlapping_boxes(
             predictions[curr_indices], iou_threshold
         )
 
diff --git a/test/detection/test_utils.py b/test/detection/test_utils.py
index 9a1fa8c93..b62faa619 100644
--- a/test/detection/test_utils.py
+++ b/test/detection/test_utils.py
@@ -6,12 +6,12 @@
 
 from supervision.config import CLASS_NAME_DATA_FIELD
 from supervision.detection.utils import (
-    _box_non_max_merge_all,
     box_non_max_suppression,
     calculate_masks_centroids,
     clip_boxes,
     filter_polygons_by_area,
     get_data_item,
+    group_overlapping_boxes,
     mask_non_max_suppression,
     merge_data,
     move_boxes,
@@ -241,14 +241,14 @@ def test_box_non_max_suppression(
         ),  # confidence
     ],
 )
-def test_box_non_max_merge(
+def test_group_overlapping_boxes(
     predictions: np.ndarray,
     iou_threshold: float,
     expected_result: List[List[int]],
     exception: Exception,
 ) -> None:
     with exception:
-        result = _box_non_max_merge_all(
+        result = group_overlapping_boxes(
             predictions=predictions, iou_threshold=iou_threshold
         )