From c78ae33e43c95e067e2ae34ff9e7616fe696cac3 Mon Sep 17 00:00:00 2001 From: mario-dg Date: Fri, 13 Oct 2023 18:24:22 +0200 Subject: [PATCH 01/26] =?UTF-8?q?feat:=20=F0=9F=9A=80=20Added=20Non-Maximu?= =?UTF-8?q?m=20Merging=20to=20Detections?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/detection/core.py | 107 ++++++++++ .../detection/tools/inference_slicer.py | 17 +- supervision/detection/utils.py | 190 +++++++++++++++++- 3 files changed, 310 insertions(+), 4 deletions(-) diff --git a/supervision/detection/core.py b/supervision/detection/core.py index 77bfca9da..006bc6e7e 100644 --- a/supervision/detection/core.py +++ b/supervision/detection/core.py @@ -6,7 +6,15 @@ import numpy as np from supervision.detection.utils import ( + batched_greedy_nmm, + box_iou_batch, extract_ultralytics_masks, + get_merged_bbox, + get_merged_class_id, + get_merged_confidence, + get_merged_mask, + get_merged_tracker_id, + greedy_nmm, non_max_suppression, process_roboflow_result, xywh_to_xyxy, @@ -729,6 +737,105 @@ def box_area(self) -> np.ndarray: """ return (self.xyxy[:, 3] - self.xyxy[:, 1]) * (self.xyxy[:, 2] - self.xyxy[:, 0]) + def with_nmm( + self, threshold: float = 0.5, class_agnostic: bool = False + ) -> Detections: + """ + Perform non-maximum merging on the current set of object detections. + + Args: + threshold (float, optional): The intersection-over-union threshold + to use for non-maximum merging. Defaults to 0.5. + class_agnostic (bool, optional): Whether to perform class-agnostic + non-maximum merging. If True, the class_id of each detection + will be ignored. Defaults to False. + + Returns: + Detections: A new Detections object containing the subset of detections + after non-maximum merging. + + Raises: + AssertionError: If `confidence` is None and class_agnostic is False. + If `class_id` is None and class_agnostic is False. + """ + if len(self) == 0: + return self + + assert ( + self.confidence is not None + ), "Detections confidence must be given for NMM to be executed." + + if class_agnostic: + predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1))) + keep_to_merge_list = greedy_nmm(predictions, threshold) + else: + predictions = np.hstack( + ( + self.xyxy, + self.confidence.reshape(-1, 1), + self.class_id.reshape(-1, 1), + ) + ) + keep_to_merge_list = batched_greedy_nmm(predictions, threshold) + + result = [] + + for keep_ind, merge_ind_list in keep_to_merge_list.items(): + for merge_ind in merge_ind_list: + if ( + box_iou_batch(self[keep_ind].xyxy, self[merge_ind].xyxy).item() + > threshold + ): + self[keep_ind].xyxy = np.vstack( + ( + self[keep_ind].xyxy, + get_merged_bbox(self.xyxy[keep_ind], self.xyxy[merge_ind]), + ) + ) + self[keep_ind].class_id = np.hstack( + ( + self[keep_ind].class_id, + get_merged_class_id( + self.class_id[keep_ind].item(), + self.class_id[merge_ind].item(), + ), + ) + ) + self[keep_ind].confidence = np.hstack( + ( + self[keep_ind].confidence, + get_merged_confidence( + self.confidence[keep_ind].item(), + self.confidence[merge_ind].item(), + ), + ) + ) + if self.mask is not None: + merged_mask = get_merged_mask( + self.mask[keep_ind], self.mask[merge_ind] + ) + if self[keep_ind].mask is None: + self[keep_ind].mask = np.array([merged_mask]) + else: + self[keep_ind].mask = np.vstack( + (self[keep_ind].mask, merged_mask[np.newaxis]) + ) + if self.tracker_id is not None: + merged_tracker_id = get_merged_tracker_id( + self.tracker_id[keep_ind].item(), + self.tracker_id[merge_ind].item(), + ) + if self[keep_ind].tracker_id is None: + self[keep_ind].tracker_id = np.array( + [merged_tracker_id], dtype=int + ) + else: + self[keep_ind].tracker_id = np.hstack( + (self[keep_ind].tracker_id, merged_tracker_id) + ) + result.append(self[keep_ind]) + return Detections.merge(result) + def with_nms( self, threshold: float = 0.5, class_agnostic: bool = False ) -> Detections: diff --git a/supervision/detection/tools/inference_slicer.py b/supervision/detection/tools/inference_slicer.py index 5f6fb391d..2098c79c8 100644 --- a/supervision/detection/tools/inference_slicer.py +++ b/supervision/detection/tools/inference_slicer.py @@ -36,6 +36,10 @@ class InferenceSlicer: slices in the format `(width_ratio, height_ratio)`. iou_threshold (Optional[float]): Intersection over Union (IoU) threshold used for non-max suppression. + merge_detections (Optional[bool]): Whether to merge the detection from all + slices or simply concatenate them. If `True`, Non-Maximum Merging (NMM), + otherwise Non-Maximum Suppression (NMS), + is applied to the final detections. callback (Callable): A function that performs inference on a given image slice and returns detections. thread_workers (int): Number of threads for parallel execution. @@ -53,11 +57,13 @@ def __init__( slice_wh: Tuple[int, int] = (320, 320), overlap_ratio_wh: Tuple[float, float] = (0.2, 0.2), iou_threshold: Optional[float] = 0.5, + merge_detections: Optional[bool] = False, thread_workers: int = 1, ): self.slice_wh = slice_wh self.overlap_ratio_wh = overlap_ratio_wh self.iou_threshold = iou_threshold + self.merge_detections = merge_detections self.callback = callback self.thread_workers = thread_workers validate_inference_callback(callback=callback) @@ -109,9 +115,14 @@ def __call__(self, image: np.ndarray) -> Detections: for future in as_completed(futures): detections_list.append(future.result()) - return Detections.merge(detections_list=detections_list).with_nms( - threshold=self.iou_threshold - ) + if self.merge_detections: + return Detections.merge(detections_list=detections_list).with_nmm( + threshold=self.iou_threshold + ) + else: + return Detections.merge(detections_list=detections_list).with_nms( + threshold=self.iou_threshold + ) def _run_callback(self, image, offset) -> Detections: """ diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py index 7a5eb5469..b0414eb44 100644 --- a/supervision/detection/utils.py +++ b/supervision/detection/utils.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Tuple +from typing import Dict, List, Optional, Tuple import cv2 import numpy as np @@ -110,6 +110,194 @@ def non_max_suppression( return keep[sort_index.argsort()] +def greedy_nmm(predictions: np.ndarray, threshold: float = 0.5) -> Dict[int, List[int]]: + """ + Apply greedy version of non-maximum merging to avoid detecting too many + overlapping bounding boxes for a given object. + + Args: + predictions (np.ndarray): An array of shape `(n, 5)` containing + the bounding boxes coordinates in format `[x1, y1, x2, y2]` + and the confidence scores. + threshold (float, optional): The intersection-over-union threshold + to use for non-maximum suppression. Defaults to 0.5. + + Returns: + Dict[int, List[int]]: Mapping from prediction indices + to keep to a list of prediction indices to be merged. + """ + keep_to_merge_list = {} + + x1 = predictions[:, 0] + y1 = predictions[:, 1] + x2 = predictions[:, 2] + y2 = predictions[:, 3] + + scores = predictions[:, 4] + + areas = (x2 - x1) * (y2 - y1) + + order = scores.argsort() + + keep = [] + + while len(order) > 0: + idx = order[-1] + + keep.append(idx.tolist()) + + order = order[:-1] + + if len(order) == 0: + keep_to_merge_list[idx.tolist()] = [] + break + + xx1 = np.take(x1, axis=0, indices=order) + xx2 = np.take(x2, axis=0, indices=order) + yy1 = np.take(y1, axis=0, indices=order) + yy2 = np.take(y2, axis=0, indices=order) + + xx1 = np.maximum(xx1, x1[idx]) + yy1 = np.maximum(yy1, y1[idx]) + xx2 = np.minimum(xx2, x2[idx]) + yy2 = np.minimum(yy2, y2[idx]) + + w = np.maximum(0.0, xx2 - xx1) + h = np.maximum(0.0, yy2 - yy1) + + inter = w * h + + rem_areas = np.take(areas, axis=0, indices=order) + + union = (rem_areas - inter) + areas[idx] + match_metric_value = inter / union + + mask = match_metric_value < threshold + mask = mask.astype(np.uint8) + matched_box_indices = np.flip(order[np.where(mask == 0)[0]]) + unmatched_indices = order[np.where(mask == 1)[0]] + + order = unmatched_indices[scores[unmatched_indices].argsort()] + + keep_to_merge_list[idx.tolist()] = [] + + for matched_box_ind in matched_box_indices.tolist(): + keep_to_merge_list[idx.tolist()].append(matched_box_ind) + + return keep_to_merge_list + + +def batched_greedy_nmm( + predictions: np.ndarray, threshold: float = 0.5 +) -> Dict[int, List[int]]: + """ + Apply greedy version of non-maximum merging per category to avoid detecting + too many overlapping bounding boxes for a given object. + + Args: + predictions (np.ndarray): An array of shape `(n, 6)` containing + the bounding boxes coordinates in format `[x1, y1, x2, y2]`, + the confidence scores and class_ids. + threshold (float, optional): The intersection-over-union threshold + to use for non-maximum suppression. Defaults to 0.5. + + Returns: + Dict[int, List[int]]: Mapping from prediction indices + to keep to a list of prediction indices to be merged. + """ + category_ids = predictions[:, 5] + keep_to_merge_list = {} + for category_id in np.unique(category_ids): + curr_indices = np.where(category_ids == category_id)[0] + curr_keep_to_merge_list = greedy_nmm(predictions[curr_indices], threshold) + curr_indices_list = curr_indices.tolist() + for curr_keep, curr_merge_list in curr_keep_to_merge_list.items(): + keep = curr_indices_list[curr_keep] + merge_list = [curr_indices_list[i] for i in curr_merge_list] + keep_to_merge_list[keep] = merge_list + return keep_to_merge_list + + +def get_merged_bbox(bbox1: np.ndarray, bbox2: np.ndarray) -> np.ndarray: + """ + Merges two bounding boxes into one. + + Args: + bbox1 (np.ndarray): A numpy array of shape `(, 4)` where the + row corresponds to a bounding box in + the format `(x_min, y_min, x_max, y_max)`. + bbox2 (np.ndarray): A numpy array of shape `(, 4)` where the + row corresponds to a bounding box in + the format `(x_min, y_min, x_max, y_max)`. + + Returns: + np.ndarray: A numpy array of shape `(, 4)` where the new + bounding box is the merged bounding box of `bbox1` and `bbox2`. + """ + left_top = np.minimum(bbox1[:2], bbox2[:2]) + right_bottom = np.maximum(bbox1[2:], bbox2[2:]) + return np.concatenate([left_top, right_bottom]) + + +def get_merged_class_id(id1: int, id2: int) -> int: + """ + Merges two class ids into one. + + Args: + id1 (int): The first class id. + id2 (int): The second class id. + + Returns: + int: The merged class id. + """ + return max(id1, id2) + + +def get_merged_confidence(confidence1: float, confidence2: float) -> float: + """ + Merges two confidences into one. + + Args: + confidence1 (float): The first confidence. + confidence2 (float): The second confidence. + + Returns: + float: The merged confidence. + """ + return max(confidence1, confidence2) + + +def get_merged_mask(mask1: np.ndarray, mask2: np.ndarray) -> np.ndarray: + """ + Merges two masks into one. + + Args: + mask1 (np.ndarray): A numpy array of shape `(H, W)` where `H` and `W` + are the height and width of the mask, respectively. + mask2 (np.ndarray): A numpy array of shape `(H, W)` where `H` and `W` + are the height and width of the mask, respectively. + + Returns: + np.ndarray: A numpy array of shape `(H, W)` where the new mask is the + merged mask of `mask1` and `mask2`. + """ + return np.logical_or(mask1, mask2) + + +def get_merged_tracker_id(tracker_id1: int, tracker_id2: int) -> int: + """ + Merges two tracker ids into one. + + Args: + tracker_id1 (int): The first tracker id. + tracker_id2 (int): The second tracker id. + + Returns: + int: The merged tracker id. + """ + return max(tracker_id1, tracker_id2) + + def clip_boxes( boxes_xyxy: np.ndarray, frame_resolution_wh: Tuple[int, int] ) -> np.ndarray: From 57b12e6e00069d9064df783eaac40d230c4626bd Mon Sep 17 00:00:00 2001 From: mario-dg Date: Thu, 19 Oct 2023 00:03:36 +0200 Subject: [PATCH 02/26] Added __setitem__ to Detections and refactored the object prediction merging --- supervision/detection/core.py | 104 +++++++++++++++++++--------------- 1 file changed, 58 insertions(+), 46 deletions(-) diff --git a/supervision/detection/core.py b/supervision/detection/core.py index 006bc6e7e..bd729a964 100644 --- a/supervision/detection/core.py +++ b/supervision/detection/core.py @@ -67,6 +67,27 @@ def _validate_tracker_id(tracker_id: Any, n: int) -> None: raise ValueError("tracker_id must be None or 1d np.ndarray with (n,) shape") +def _merge_object_detection_pair(pred1: Detections, pred2: Detections) -> Detections: + merged_bbox = get_merged_bbox(pred1.xyxy, pred2.xyxy) + merged_conf = get_merged_confidence(pred1.confidence, pred2.confidence) + merged_class_id = get_merged_class_id(pred1.class_id, pred2.class_id) + merged_tracker_id = None + merged_mask = None + + if pred1.mask and pred2.mask: + merged_mask = get_merged_mask(pred1.mask, pred2.mask) + if pred1.tracker_id and pred2.tracker_id: + merged_tracker_id = get_merged_tracker_id(pred1.tracker_id, pred2.tracker_id) + + return Detections( + xyxy=merged_bbox, + mask=merged_mask, + confidence=merged_conf, + class_id=merged_class_id, + tracker_id=merged_tracker_id, + ) + + @dataclass class Detections: """ @@ -668,6 +689,38 @@ def get_anchor_coordinates(self, anchor: Position) -> np.ndarray: raise ValueError(f"{anchor} is not supported.") + def __setitem__( + self, index: Union[int, slice, List[int], np.ndarray], value: Detections + ) -> None: + """ + Set a subset of the Detections object. + + Args: + index (Union[int, slice, List[int], np.ndarray]): + The index or indices of the subset of the Detections + value (Detections): The new value of the subset of the Detections + + Example: + ```python + >>> import supervision as sv + + >>> detections = sv.Detections(...) + + >>> detections[0] = sv.Detections(...) + ``` + """ + if isinstance(index, int): + index = [index] + self.xyxy[index] = value.xyxy + if self.mask is not None: + self.mask[index] = value.mask + if self.confidence is not None: + self.confidence[index] = value.confidence + if self.class_id is not None: + self.class_id[index] = value.class_id + if self.tracker_id is not None: + self.tracker_id[index] = value.tracker_id + def __getitem__( self, index: Union[int, slice, List[int], np.ndarray] ) -> Detections: @@ -761,6 +814,8 @@ def with_nmm( if len(self) == 0: return self + assert 0.0 <= threshold <= 1.0, "Threshold must be between 0 and 1." + assert ( self.confidence is not None ), "Detections confidence must be given for NMM to be executed." @@ -786,54 +841,11 @@ def with_nmm( box_iou_batch(self[keep_ind].xyxy, self[merge_ind].xyxy).item() > threshold ): - self[keep_ind].xyxy = np.vstack( - ( - self[keep_ind].xyxy, - get_merged_bbox(self.xyxy[keep_ind], self.xyxy[merge_ind]), - ) + self[keep_ind] = _merge_object_detection_pair( + self[keep_ind], self[merge_ind] ) - self[keep_ind].class_id = np.hstack( - ( - self[keep_ind].class_id, - get_merged_class_id( - self.class_id[keep_ind].item(), - self.class_id[merge_ind].item(), - ), - ) - ) - self[keep_ind].confidence = np.hstack( - ( - self[keep_ind].confidence, - get_merged_confidence( - self.confidence[keep_ind].item(), - self.confidence[merge_ind].item(), - ), - ) - ) - if self.mask is not None: - merged_mask = get_merged_mask( - self.mask[keep_ind], self.mask[merge_ind] - ) - if self[keep_ind].mask is None: - self[keep_ind].mask = np.array([merged_mask]) - else: - self[keep_ind].mask = np.vstack( - (self[keep_ind].mask, merged_mask[np.newaxis]) - ) - if self.tracker_id is not None: - merged_tracker_id = get_merged_tracker_id( - self.tracker_id[keep_ind].item(), - self.tracker_id[merge_ind].item(), - ) - if self[keep_ind].tracker_id is None: - self[keep_ind].tracker_id = np.array( - [merged_tracker_id], dtype=int - ) - else: - self[keep_ind].tracker_id = np.hstack( - (self[keep_ind].tracker_id, merged_tracker_id) - ) result.append(self[keep_ind]) + return Detections.merge(result) def with_nms( From 9f222736e129df769a9771bda12eb235795e0801 Mon Sep 17 00:00:00 2001 From: mario-dg Date: Thu, 19 Oct 2023 00:05:05 +0200 Subject: [PATCH 03/26] Added standard full image inference after sliced inference to increase large object detection accuracy --- supervision/detection/tools/inference_slicer.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/supervision/detection/tools/inference_slicer.py b/supervision/detection/tools/inference_slicer.py index 2098c79c8..c0a30ff66 100644 --- a/supervision/detection/tools/inference_slicer.py +++ b/supervision/detection/tools/inference_slicer.py @@ -38,8 +38,10 @@ class InferenceSlicer: used for non-max suppression. merge_detections (Optional[bool]): Whether to merge the detection from all slices or simply concatenate them. If `True`, Non-Maximum Merging (NMM), - otherwise Non-Maximum Suppression (NMS), - is applied to the final detections. + otherwise Non-Maximum Suppression (NMS), is applied to the detections. + perform_standard_pred (Optional[bool]): Whether to perform inference on the + whole image in addition to the slices to increase the accuracy of + large object detection. callback (Callable): A function that performs inference on a given image slice and returns detections. thread_workers (int): Number of threads for parallel execution. @@ -58,12 +60,14 @@ def __init__( overlap_ratio_wh: Tuple[float, float] = (0.2, 0.2), iou_threshold: Optional[float] = 0.5, merge_detections: Optional[bool] = False, + perform_standard_pred: Optional[bool] = False, thread_workers: int = 1, ): self.slice_wh = slice_wh self.overlap_ratio_wh = overlap_ratio_wh self.iou_threshold = iou_threshold self.merge_detections = merge_detections + self.perform_standard_pred = perform_standard_pred self.callback = callback self.thread_workers = thread_workers validate_inference_callback(callback=callback) @@ -115,6 +119,9 @@ def __call__(self, image: np.ndarray) -> Detections: for future in as_completed(futures): detections_list.append(future.result()) + if self.perform_standard_pred: + detections_list.append(self.callback(image)) + if self.merge_detections: return Detections.merge(detections_list=detections_list).with_nmm( threshold=self.iou_threshold From 6f4704625b16ba69068b3a19f6d55bc21c80c434 Mon Sep 17 00:00:00 2001 From: mario-dg Date: Thu, 19 Oct 2023 00:05:42 +0200 Subject: [PATCH 04/26] Refactored merging of Detection attributes to better work with np.ndarrays --- supervision/detection/utils.py | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py index b0414eb44..a79900b4b 100644 --- a/supervision/detection/utils.py +++ b/supervision/detection/utils.py @@ -162,8 +162,8 @@ def greedy_nmm(predictions: np.ndarray, threshold: float = 0.5) -> Dict[int, Lis xx2 = np.minimum(xx2, x2[idx]) yy2 = np.minimum(yy2, y2[idx]) - w = np.maximum(0.0, xx2 - xx1) - h = np.maximum(0.0, yy2 - yy1) + w = np.maximum(0, xx2 - xx1) + h = np.maximum(0, yy2 - yy1) inter = w * h @@ -234,37 +234,39 @@ def get_merged_bbox(bbox1: np.ndarray, bbox2: np.ndarray) -> np.ndarray: np.ndarray: A numpy array of shape `(, 4)` where the new bounding box is the merged bounding box of `bbox1` and `bbox2`. """ - left_top = np.minimum(bbox1[:2], bbox2[:2]) - right_bottom = np.maximum(bbox1[2:], bbox2[2:]) - return np.concatenate([left_top, right_bottom]) + left_top = np.minimum(bbox1[0][:2], bbox2[0][:2]) + right_bottom = np.maximum(bbox1[0][2:], bbox2[0][2:]) + return np.array([np.concatenate([left_top, right_bottom])]) -def get_merged_class_id(id1: int, id2: int) -> int: +def get_merged_class_id(id1: np.ndarray, id2: np.ndarray) -> np.ndarray: """ Merges two class ids into one. Args: - id1 (int): The first class id. - id2 (int): The second class id. + id1 (np.ndarray): The first class id. + id2 (np.ndarray): The second class id. Returns: - int: The merged class id. + np.ndarray: The merged class id. """ - return max(id1, id2) + return np.array([max(id1.item(), id2.item())]) -def get_merged_confidence(confidence1: float, confidence2: float) -> float: +def get_merged_confidence( + confidence1: np.ndarray, confidence2: np.ndarray +) -> np.ndarray: """ Merges two confidences into one. Args: - confidence1 (float): The first confidence. - confidence2 (float): The second confidence. + confidence1 (np.ndarray): The first confidence. + confidence2 (np.ndarray): The second confidence. Returns: - float: The merged confidence. + np.ndarray: The merged confidence. """ - return max(confidence1, confidence2) + return np.array([max(confidence1.item(), confidence2.item())]) def get_merged_mask(mask1: np.ndarray, mask2: np.ndarray) -> np.ndarray: From 166a8da9a07b20852c4559624fe029fc87bc8751 Mon Sep 17 00:00:00 2001 From: mario-dg Date: Thu, 11 Apr 2024 12:22:44 +0200 Subject: [PATCH 05/26] Implement Feedback --- supervision/detection/core.py | 154 +++++++++++------- .../detection/tools/inference_slicer.py | 24 +-- supervision/detection/utils.py | 69 +------- 3 files changed, 103 insertions(+), 144 deletions(-) diff --git a/supervision/detection/core.py b/supervision/detection/core.py index 66387087c..a9a4ee92d 100644 --- a/supervision/detection/core.py +++ b/supervision/detection/core.py @@ -8,22 +8,17 @@ from supervision.config import CLASS_NAME_DATA_FIELD, ORIENTED_BOX_COORDINATES from supervision.detection.utils import ( - batched_greedy_nmm, + batch_non_max_merge, box_iou_batch, box_non_max_suppression, calculate_masks_centroids, extract_ultralytics_masks, get_data_item, - get_merged_bbox, - get_merged_class_id, - get_merged_confidence, - get_merged_mask, - get_merged_tracker_id, - greedy_nmm, is_data_equal, mask_non_max_suppression, mask_to_xyxy, merge_data, + non_max_merge, process_roboflow_result, validate_detections_fields, xywh_to_xyxy, @@ -32,17 +27,57 @@ from supervision.utils.internal import deprecated -def _merge_object_detection_pair(pred1: Detections, pred2: Detections) -> Detections: - merged_bbox = get_merged_bbox(pred1.xyxy, pred2.xyxy) - merged_conf = get_merged_confidence(pred1.confidence, pred2.confidence) - merged_class_id = get_merged_class_id(pred1.class_id, pred2.class_id) +def _merge_object_detection_pair(det1: Detections, det2: Detections) -> Detections: + """ + Merges two Detections object into a single Detections object. + + A `winning` detection is determined based on the confidence score of the two + input detections. This winning detection is then used to specify which `class_id`, + `tracker_id`, and `data` to include in the merged Detections object. + The resulting `confidence` of the merged object is calculated by the weighted + contribution of each detection to the merged object. + The bounding boxes and masks of the two input detections are merged into a single + bounding box and mask, respectively. + + Args: + det1 (Detections): + The first Detections object + det2 (Detections): + The second Detections object + + Returns: + Detections: A new Detections object, with merged attributes. + """ + assert ( + len(det1) == len(det2) == 1 + ), "Both Detections should have exactly 1 detected object." + winning_det = det1 if det1.confidence.item() > det2.confidence.item() else det2 + + area_det1 = (det1.xyxy[0][2] - det1.xyxy[0][0]) * ( + det1.xyxy[0][3] - det1.xyxy[0][1] + ) + area_det2 = (det2.xyxy[0][2] - det2.xyxy[0][0]) * ( + det2.xyxy[0][3] - det2.xyxy[0][1] + ) + merged_x1, merged_y1 = np.minimum(det1.xyxy[0][:2], det2.xyxy[0][:2]) + merged_x2, merged_y2 = np.maximum(det1.xyxy[0][2:], det2.xyxy[0][2:]) + merged_area = (merged_x2 - merged_x1) * (merged_y2 - merged_y1) + + merged_conf = ( + area_det1 * det1.confidence.item() + area_det2 * det2.confidence.item() + ) / merged_area + merged_bbox = [np.concatenate([merged_x1, merged_y1, merged_x2, merged_y2])] + merged_class_id = winning_det.class_id.item() merged_tracker_id = None merged_mask = None + merged_data = None - if pred1.mask and pred2.mask: - merged_mask = get_merged_mask(pred1.mask, pred2.mask) - if pred1.tracker_id and pred2.tracker_id: - merged_tracker_id = get_merged_tracker_id(pred1.tracker_id, pred2.tracker_id) + if det1.mask and det2.mask: + merged_mask = np.logical_or(det1.mask, det2.mask) + if det1.tracker_id and det2.tracker_id: + merged_tracker_id = winning_det.tracker_id.item() + if det1.data and det2.data: + merged_data = winning_det.data return Detections( xyxy=merged_bbox, @@ -50,6 +85,7 @@ def _merge_object_detection_pair(pred1: Detections, pred2: Detections) -> Detect confidence=merged_conf, class_id=merged_class_id, tracker_id=merged_tracker_id, + data=merged_data, ) @@ -1091,22 +1127,24 @@ def box_area(self) -> np.ndarray: """ return (self.xyxy[:, 3] - self.xyxy[:, 1]) * (self.xyxy[:, 2] - self.xyxy[:, 0]) - def with_nmm( + def with_nms( self, threshold: float = 0.5, class_agnostic: bool = False ) -> Detections: """ - Perform non-maximum merging on the current set of object detections. + Performs non-max suppression on detection set. If the detections result + from a segmentation model, the IoU mask is applied. Otherwise, box IoU is used. Args: threshold (float, optional): The intersection-over-union threshold - to use for non-maximum merging. Defaults to 0.5. + to use for non-maximum suppression. I'm the lower the value the more + restrictive the NMS becomes. Defaults to 0.5. class_agnostic (bool, optional): Whether to perform class-agnostic - non-maximum merging. If True, the class_id of each detection + non-maximum suppression. If True, the class_id of each detection will be ignored. Defaults to False. Returns: Detections: A new Detections object containing the subset of detections - after non-maximum merging. + after non-maximum suppression. Raises: AssertionError: If `confidence` is None and class_agnostic is False. @@ -1115,16 +1153,17 @@ def with_nmm( if len(self) == 0: return self - assert 0.0 <= threshold <= 1.0, "Threshold must be between 0 and 1." - assert ( self.confidence is not None - ), "Detections confidence must be given for NMM to be executed." + ), "Detections confidence must be given for NMS to be executed." if class_agnostic: predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1))) - keep_to_merge_list = greedy_nmm(predictions, threshold) else: + assert self.class_id is not None, ( + "Detections class_id must be given for NMS to be executed. If you" + " intended to perform class agnostic NMS set class_agnostic=True." + ) predictions = np.hstack( ( self.xyxy, @@ -1132,41 +1171,34 @@ def with_nmm( self.class_id.reshape(-1, 1), ) ) - keep_to_merge_list = batched_greedy_nmm(predictions, threshold) - - result = [] - for keep_ind, merge_ind_list in keep_to_merge_list.items(): - for merge_ind in merge_ind_list: - if ( - box_iou_batch(self[keep_ind].xyxy, self[merge_ind].xyxy).item() - > threshold - ): - self[keep_ind] = _merge_object_detection_pair( - self[keep_ind], self[merge_ind] - ) - result.append(self[keep_ind]) + if self.mask is not None: + indices = mask_non_max_suppression( + predictions=predictions, masks=self.mask, iou_threshold=threshold + ) + else: + indices = box_non_max_suppression( + predictions=predictions, iou_threshold=threshold + ) - return Detections.merge(result) + return self[indices] - def with_nms( + def with_nmm( self, threshold: float = 0.5, class_agnostic: bool = False ) -> Detections: """ - Performs non-max suppression on detection set. If the detections result - from a segmentation model, the IoU mask is applied. Otherwise, box IoU is used. + Perform non-maximum merging on the current set of object detections. Args: threshold (float, optional): The intersection-over-union threshold - to use for non-maximum suppression. I'm the lower the value the more - restrictive the NMS becomes. Defaults to 0.5. + to use for non-maximum merging. Defaults to 0.5. class_agnostic (bool, optional): Whether to perform class-agnostic - non-maximum suppression. If True, the class_id of each detection + non-maximum merging. If True, the class_id of each detection will be ignored. Defaults to False. Returns: Detections: A new Detections object containing the subset of detections - after non-maximum suppression. + after non-maximum merging. Raises: AssertionError: If `confidence` is None and class_agnostic is False. @@ -1175,17 +1207,16 @@ def with_nms( if len(self) == 0: return self + assert 0.0 <= threshold <= 1.0, "Threshold must be between 0 and 1." + assert ( self.confidence is not None - ), "Detections confidence must be given for NMS to be executed." + ), "Detections confidence must be given for NMM to be executed." if class_agnostic: predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1))) + keep_to_merge_list = non_max_merge(predictions, threshold) else: - assert self.class_id is not None, ( - "Detections class_id must be given for NMS to be executed. If you" - " intended to perform class agnostic NMS set class_agnostic=True." - ) predictions = np.hstack( ( self.xyxy, @@ -1193,14 +1224,19 @@ def with_nms( self.class_id.reshape(-1, 1), ) ) + keep_to_merge_list = batch_non_max_merge(predictions, threshold) - if self.mask is not None: - indices = mask_non_max_suppression( - predictions=predictions, masks=self.mask, iou_threshold=threshold - ) - else: - indices = box_non_max_suppression( - predictions=predictions, iou_threshold=threshold - ) + result = [] - return self[indices] + for keep_ind, merge_ind_list in keep_to_merge_list.items(): + for merge_ind in merge_ind_list: + if ( + box_iou_batch(self[keep_ind].xyxy, self[merge_ind].xyxy).item() + > threshold + ): + self[keep_ind] = _merge_object_detection_pair( + self[keep_ind], self[merge_ind] + ) + result.append(self[keep_ind]) + + return Detections.merge(result) diff --git a/supervision/detection/tools/inference_slicer.py b/supervision/detection/tools/inference_slicer.py index 2aff9f6de..7157723f9 100644 --- a/supervision/detection/tools/inference_slicer.py +++ b/supervision/detection/tools/inference_slicer.py @@ -36,12 +36,6 @@ class InferenceSlicer: slices in the format `(width_ratio, height_ratio)`. iou_threshold (Optional[float]): Intersection over Union (IoU) threshold used for non-max suppression. - merge_detections (Optional[bool]): Whether to merge the detection from all - slices or simply concatenate them. If `True`, Non-Maximum Merging (NMM), - otherwise Non-Maximum Suppression (NMS), is applied to the detections. - perform_standard_pred (Optional[bool]): Whether to perform inference on the - whole image in addition to the slices to increase the accuracy of - large object detection. callback (Callable): A function that performs inference on a given image slice and returns detections. thread_workers (int): Number of threads for parallel execution. @@ -59,15 +53,11 @@ def __init__( slice_wh: Tuple[int, int] = (320, 320), overlap_ratio_wh: Tuple[float, float] = (0.2, 0.2), iou_threshold: Optional[float] = 0.5, - merge_detections: Optional[bool] = False, - perform_standard_pred: Optional[bool] = False, thread_workers: int = 1, ): self.slice_wh = slice_wh self.overlap_ratio_wh = overlap_ratio_wh self.iou_threshold = iou_threshold - self.merge_detections = merge_detections - self.perform_standard_pred = perform_standard_pred self.callback = callback self.thread_workers = thread_workers @@ -118,17 +108,9 @@ def callback(image_slice: np.ndarray) -> sv.Detections: for future in as_completed(futures): detections_list.append(future.result()) - if self.perform_standard_pred: - detections_list.append(self.callback(image)) - - if self.merge_detections: - return Detections.merge(detections_list=detections_list).with_nmm( - threshold=self.iou_threshold - ) - else: - return Detections.merge(detections_list=detections_list).with_nms( - threshold=self.iou_threshold - ) + return Detections.merge(detections_list=detections_list).with_nms( + threshold=self.iou_threshold + ) def _run_callback(self, image, offset) -> Detections: """ diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py index b9edb9d63..9e732aeb4 100644 --- a/supervision/detection/utils.py +++ b/supervision/detection/utils.py @@ -274,7 +274,9 @@ def box_non_max_suppression( return keep[sort_index.argsort()] -def greedy_nmm(predictions: np.ndarray, threshold: float = 0.5) -> Dict[int, List[int]]: +def non_max_merge( + predictions: np.ndarray, threshold: float = 0.5 +) -> Dict[int, List[int]]: """ Apply greedy version of non-maximum merging to avoid detecting too many overlapping bounding boxes for a given object. @@ -351,7 +353,7 @@ def greedy_nmm(predictions: np.ndarray, threshold: float = 0.5) -> Dict[int, Lis return keep_to_merge_list -def batched_greedy_nmm( +def batch_non_max_merge( predictions: np.ndarray, threshold: float = 0.5 ) -> Dict[int, List[int]]: """ @@ -373,7 +375,7 @@ def batched_greedy_nmm( keep_to_merge_list = {} for category_id in np.unique(category_ids): curr_indices = np.where(category_ids == category_id)[0] - curr_keep_to_merge_list = greedy_nmm(predictions[curr_indices], threshold) + curr_keep_to_merge_list = non_max_merge(predictions[curr_indices], threshold) curr_indices_list = curr_indices.tolist() for curr_keep, curr_merge_list in curr_keep_to_merge_list.items(): keep = curr_indices_list[curr_keep] @@ -403,67 +405,6 @@ def get_merged_bbox(bbox1: np.ndarray, bbox2: np.ndarray) -> np.ndarray: return np.array([np.concatenate([left_top, right_bottom])]) -def get_merged_class_id(id1: np.ndarray, id2: np.ndarray) -> np.ndarray: - """ - Merges two class ids into one. - - Args: - id1 (np.ndarray): The first class id. - id2 (np.ndarray): The second class id. - - Returns: - np.ndarray: The merged class id. - """ - return np.array([max(id1.item(), id2.item())]) - - -def get_merged_confidence( - confidence1: np.ndarray, confidence2: np.ndarray -) -> np.ndarray: - """ - Merges two confidences into one. - - Args: - confidence1 (np.ndarray): The first confidence. - confidence2 (np.ndarray): The second confidence. - - Returns: - np.ndarray: The merged confidence. - """ - return np.array([max(confidence1.item(), confidence2.item())]) - - -def get_merged_mask(mask1: np.ndarray, mask2: np.ndarray) -> np.ndarray: - """ - Merges two masks into one. - - Args: - mask1 (np.ndarray): A numpy array of shape `(H, W)` where `H` and `W` - are the height and width of the mask, respectively. - mask2 (np.ndarray): A numpy array of shape `(H, W)` where `H` and `W` - are the height and width of the mask, respectively. - - Returns: - np.ndarray: A numpy array of shape `(H, W)` where the new mask is the - merged mask of `mask1` and `mask2`. - """ - return np.logical_or(mask1, mask2) - - -def get_merged_tracker_id(tracker_id1: int, tracker_id2: int) -> int: - """ - Merges two tracker ids into one. - - Args: - tracker_id1 (int): The first tracker id. - tracker_id2 (int): The second tracker id. - - Returns: - int: The merged tracker id. - """ - return max(tracker_id1, tracker_id2) - - def clip_boxes(xyxy: np.ndarray, resolution_wh: Tuple[int, int]) -> np.ndarray: """ Clips bounding boxes coordinates to fit within the frame resolution. From d7e52bee264fb1b3b5c47a3f27b5eb67deae86a6 Mon Sep 17 00:00:00 2001 From: Linas Kondrackis Date: Mon, 6 May 2024 17:20:31 +0300 Subject: [PATCH 06/26] NMM: Add None-checks, fix area normalization, style --- supervision/detection/core.py | 181 +++++++++++++++++++++++++--------- 1 file changed, 132 insertions(+), 49 deletions(-) diff --git a/supervision/detection/core.py b/supervision/detection/core.py index b60e33632..3d1c135a3 100644 --- a/supervision/detection/core.py +++ b/supervision/detection/core.py @@ -30,14 +30,16 @@ def _merge_object_detection_pair(det1: Detections, det2: Detections) -> Detections: """ Merges two Detections object into a single Detections object. + Assumes each Detections contains exactly one object. A `winning` detection is determined based on the confidence score of the two - input detections. This winning detection is then used to specify which `class_id`, - `tracker_id`, and `data` to include in the merged Detections object. + input detections. This winning detection is then used to specify which + `class_id`, `tracker_id`, and `data` to include in the merged Detections object. + The resulting `confidence` of the merged object is calculated by the weighted contribution of each detection to the merged object. - The bounding boxes and masks of the two input detections are merged into a single - bounding box and mask, respectively. + The bounding boxes and masks of the two input detections are merged into a + single bounding box and mask, respectively. Args: det1 (Detections): @@ -47,11 +49,39 @@ def _merge_object_detection_pair(det1: Detections, det2: Detections) -> Detectio Returns: Detections: A new Detections object, with merged attributes. + + Raises: + ValueError: If the input Detections objects do not have exactly 1 detected + object. + + Example: + ```python + import cv2 + import supervision as sv + from inference import get_model + + image = cv2.imread() + model = get_model(model_id="yolov8s-640") + + result = model.infer(image)[0] + detections = sv.Detections.from_inference(result) + + merged_detections = merge_object_detection_pair( + detections[0], detections[1]) + ``` """ - assert ( - len(det1) == len(det2) == 1 - ), "Both Detections should have exactly 1 detected object." - winning_det = det1 if det1.confidence.item() > det2.confidence.item() else det2 + if len(det1) != 1 or len(det2) != 1: + raise ValueError( + "Both Detections should have exactly 1 detected object.") + + if det2.confidence is None: + winning_det = det1 + elif det1.confidence is None: + winning_det = det2 + elif det1.confidence[0] >= det2.confidence[0]: + winning_det = det1 + else: + winning_det = det2 area_det1 = (det1.xyxy[0][2] - det1.xyxy[0][0]) * ( det1.xyxy[0][3] - det1.xyxy[0][1] @@ -59,33 +89,39 @@ def _merge_object_detection_pair(det1: Detections, det2: Detections) -> Detectio area_det2 = (det2.xyxy[0][2] - det2.xyxy[0][0]) * ( det2.xyxy[0][3] - det2.xyxy[0][1] ) + merged_x1, merged_y1 = np.minimum(det1.xyxy[0][:2], det2.xyxy[0][:2]) merged_x2, merged_y2 = np.maximum(det1.xyxy[0][2:], det2.xyxy[0][2:]) - merged_area = (merged_x2 - merged_x1) * (merged_y2 - merged_y1) - - merged_conf = ( - area_det1 * det1.confidence.item() + area_det2 * det2.confidence.item() - ) / merged_area - merged_bbox = [np.concatenate([merged_x1, merged_y1, merged_x2, merged_y2])] - merged_class_id = winning_det.class_id.item() - merged_tracker_id = None - merged_mask = None - merged_data = None - if det1.mask and det2.mask: + merged_xy = np.array([[merged_x1, merged_y1, merged_x2, merged_y2]]) + + winning_class_id = winning_det.class_id + + if det1.confidence is None or det2.confidence is None: + merged_confidence = None + else: + merged_confidence = ( + area_det1 * det1.confidence[0] + area_det2 * det2.confidence[0] + ) / (area_det1 + area_det2) + merged_confidence = np.array([merged_confidence]) + + merged_mask = None + if det1.mask is not None and det2.mask is not None: merged_mask = np.logical_or(det1.mask, det2.mask) - if det1.tracker_id and det2.tracker_id: - merged_tracker_id = winning_det.tracker_id.item() + + winning_tracker_id = winning_det.tracker_id + + winning_data = None if det1.data and det2.data: - merged_data = winning_det.data + winning_data = winning_det.data return Detections( - xyxy=merged_bbox, + xyxy=merged_xy, mask=merged_mask, - confidence=merged_conf, - class_id=merged_class_id, - tracker_id=merged_tracker_id, - data=merged_data, + confidence=merged_confidence, + class_id=winning_class_id, + tracker_id=winning_tracker_id, + data=winning_data, ) @@ -260,7 +296,8 @@ def from_yolov5(cls, yolov5_results) -> Detections: detections = sv.Detections.from_yolov5(result) ``` """ - yolov5_detections_predictions = yolov5_results.pred[0].cpu().cpu().numpy() + yolov5_detections_predictions = yolov5_results.pred[0].cpu( + ).cpu().numpy() return cls( xyxy=yolov5_detections_predictions[:, :4], @@ -307,7 +344,8 @@ def from_ultralytics(cls, ultralytics_results) -> Detections: if "obb" in ultralytics_results and ultralytics_results.obb is not None: class_id = ultralytics_results.obb.cls.cpu().numpy().astype(int) - class_names = np.array([ultralytics_results.names[i] for i in class_id]) + class_names = np.array( + [ultralytics_results.names[i] for i in class_id]) oriented_box_coordinates = ultralytics_results.obb.xyxyxyxy.cpu().numpy() return cls( xyxy=ultralytics_results.obb.xyxy.cpu().numpy(), @@ -323,7 +361,8 @@ def from_ultralytics(cls, ultralytics_results) -> Detections: ) class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int) - class_names = np.array([ultralytics_results.names[i] for i in class_id]) + class_names = np.array([ultralytics_results.names[i] + for i in class_id]) return cls( xyxy=ultralytics_results.boxes.xyxy.cpu().numpy(), confidence=ultralytics_results.boxes.conf.cpu().numpy(), @@ -411,7 +450,8 @@ def from_tensorflow( return cls( xyxy=boxes, confidence=tensorflow_results["detection_scores"][0].numpy(), - class_id=tensorflow_results["detection_classes"][0].numpy().astype(int), + class_id=tensorflow_results["detection_classes"][0].numpy().astype( + int), ) @classmethod @@ -448,7 +488,8 @@ def from_deepsparse(cls, deepsparse_results) -> Detections: return cls( xyxy=np.array(deepsparse_results.boxes[0]), confidence=np.array(deepsparse_results.scores[0]), - class_id=np.array(deepsparse_results.labels[0]).astype(float).astype(int), + class_id=np.array(deepsparse_results.labels[0]).astype( + float).astype(int), ) @classmethod @@ -535,24 +576,29 @@ class names. If provided, the resulting Detections object will contain Class names values can be accessed using `detections["class_name"]`. """ # noqa: E501 // docs - class_ids = transformers_results["labels"].cpu().detach().numpy().astype(int) + class_ids = transformers_results["labels"].cpu( + ).detach().numpy().astype(int) data = {} if id2label is not None: - class_names = np.array([id2label[class_id] for class_id in class_ids]) + class_names = np.array([id2label[class_id] + for class_id in class_ids]) data[CLASS_NAME_DATA_FIELD] = class_names if "boxes" in transformers_results: return cls( xyxy=transformers_results["boxes"].cpu().detach().numpy(), - confidence=transformers_results["scores"].cpu().detach().numpy(), + confidence=transformers_results["scores"].cpu( + ).detach().numpy(), class_id=class_ids, data=data, ) elif "masks" in transformers_results: - masks = transformers_results["masks"].cpu().detach().numpy().astype(bool) + masks = transformers_results["masks"].cpu( + ).detach().numpy().astype(bool) return cls( xyxy=mask_to_xyxy(masks), mask=masks, - confidence=transformers_results["scores"].cpu().detach().numpy(), + confidence=transformers_results["scores"].cpu( + ).detach().numpy(), class_id=class_ids, data=data, ) @@ -595,7 +641,8 @@ class IDs, and confidences of the predictions. """ return cls( - xyxy=detectron2_results["instances"].pred_boxes.tensor.cpu().numpy(), + xyxy=detectron2_results["instances"].pred_boxes.tensor.cpu( + ).numpy(), confidence=detectron2_results["instances"].scores.cpu().numpy(), class_id=detectron2_results["instances"] .pred_classes.cpu() @@ -638,7 +685,8 @@ def from_inference(cls, roboflow_result: Union[dict, Any]) -> Detections: Class names values can be accessed using `detections["class_name"]`. """ with suppress(AttributeError): - roboflow_result = roboflow_result.dict(exclude_none=True, by_alias=True) + roboflow_result = roboflow_result.dict( + exclude_none=True, by_alias=True) xyxy, confidence, class_id, masks, trackers, data = process_roboflow_result( roboflow_result=roboflow_result ) @@ -730,7 +778,8 @@ def from_sam(cls, sam_result: List[dict]) -> Detections: ) xywh = np.array([mask["bbox"] for mask in sorted_generated_masks]) - mask = np.array([mask["segmentation"] for mask in sorted_generated_masks]) + mask = np.array([mask["segmentation"] + for mask in sorted_generated_masks]) if np.asarray(xywh).shape[0] == 0: return cls.empty() @@ -957,7 +1006,8 @@ def stack_or_none(name: str): if all(d.__getattribute__(name) is None for d in detections_list): return None if any(d.__getattribute__(name) is None for d in detections_list): - raise ValueError(f"All or none of the '{name}' fields must be None") + raise ValueError( + f"All or none of the '{name}' fields must be None") return ( np.vstack([d.__getattribute__(name) for d in detections_list]) if name == "mask" @@ -1128,6 +1178,34 @@ def __setitem__(self, key: str, value: Union[np.ndarray, List]): self.data[key] = value + def _set_at_index(self, index: int, other: Detections): + """ + Set detection values (xyxy, confidence, ...) at a specified index + to those of another Detections object, at index 0. + + Args: + index (int): The index in current detection, where values + will be set. + other (Detections): Detections object with exactly one element + to set the values from. + + Raises: + ValueError: If `other` is not made of exactly one element. + """ + if len(other) != 1: + raise ValueError( + "Detection to set from must have exactly one element.") + + self.xyxy[index] = other.xyxy[0] + if self.mask is not None and other.mask is not None: + self.mask[index] = other.mask[0] + if self.confidence is not None and other.confidence is not None: + self.confidence[index] = other.confidence[0] + if self.class_id is not None and other.class_id is not None: + self.class_id[index] = other.class_id[0] + if self.tracker_id is not None and other.tracker_id is not None: + self.tracker_id[index] = other.tracker_id[0] + @property def area(self) -> np.ndarray: """ @@ -1188,7 +1266,8 @@ def with_nms( ), "Detections confidence must be given for NMS to be executed." if class_agnostic: - predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1))) + predictions = np.hstack( + (self.xyxy, self.confidence.reshape(-1, 1))) else: assert self.class_id is not None, ( "Detections class_id must be given for NMS to be executed. If you" @@ -1244,9 +1323,14 @@ def with_nmm( ), "Detections confidence must be given for NMM to be executed." if class_agnostic: - predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1))) + predictions = np.hstack( + (self.xyxy, self.confidence.reshape(-1, 1))) keep_to_merge_list = non_max_merge(predictions, threshold) else: + assert self.class_id is not None, ( + "Detections class_id must be given for NMS to be executed. If you" + " intended to perform class agnostic NMM set class_agnostic=True." + ) predictions = np.hstack( ( self.xyxy, @@ -1257,16 +1341,15 @@ def with_nmm( keep_to_merge_list = batch_non_max_merge(predictions, threshold) result = [] - for keep_ind, merge_ind_list in keep_to_merge_list.items(): for merge_ind in merge_ind_list: - if ( - box_iou_batch(self[keep_ind].xyxy, self[merge_ind].xyxy).item() - > threshold - ): - self[keep_ind] = _merge_object_detection_pair( + box_iou = box_iou_batch( + self[keep_ind].xyxy, self[merge_ind].xyxy)[0] + if box_iou > threshold: + merged_detection = _merge_object_detection_pair( self[keep_ind], self[merge_ind] ) + self._set_at_index(keep_ind, merged_detection) result.append(self[keep_ind]) return Detections.merge(result) From bee3252110887fe941028ef696ebe0f36eae3b7e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 6 May 2024 14:22:31 +0000 Subject: [PATCH 07/26] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?= =?UTF-8?q?=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/detection/core.py | 57 ++++++++++++----------------------- 1 file changed, 19 insertions(+), 38 deletions(-) diff --git a/supervision/detection/core.py b/supervision/detection/core.py index 3d1c135a3..fa34c158d 100644 --- a/supervision/detection/core.py +++ b/supervision/detection/core.py @@ -71,8 +71,7 @@ def _merge_object_detection_pair(det1: Detections, det2: Detections) -> Detectio ``` """ if len(det1) != 1 or len(det2) != 1: - raise ValueError( - "Both Detections should have exactly 1 detected object.") + raise ValueError("Both Detections should have exactly 1 detected object.") if det2.confidence is None: winning_det = det1 @@ -296,8 +295,7 @@ def from_yolov5(cls, yolov5_results) -> Detections: detections = sv.Detections.from_yolov5(result) ``` """ - yolov5_detections_predictions = yolov5_results.pred[0].cpu( - ).cpu().numpy() + yolov5_detections_predictions = yolov5_results.pred[0].cpu().cpu().numpy() return cls( xyxy=yolov5_detections_predictions[:, :4], @@ -344,8 +342,7 @@ def from_ultralytics(cls, ultralytics_results) -> Detections: if "obb" in ultralytics_results and ultralytics_results.obb is not None: class_id = ultralytics_results.obb.cls.cpu().numpy().astype(int) - class_names = np.array( - [ultralytics_results.names[i] for i in class_id]) + class_names = np.array([ultralytics_results.names[i] for i in class_id]) oriented_box_coordinates = ultralytics_results.obb.xyxyxyxy.cpu().numpy() return cls( xyxy=ultralytics_results.obb.xyxy.cpu().numpy(), @@ -361,8 +358,7 @@ def from_ultralytics(cls, ultralytics_results) -> Detections: ) class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int) - class_names = np.array([ultralytics_results.names[i] - for i in class_id]) + class_names = np.array([ultralytics_results.names[i] for i in class_id]) return cls( xyxy=ultralytics_results.boxes.xyxy.cpu().numpy(), confidence=ultralytics_results.boxes.conf.cpu().numpy(), @@ -450,8 +446,7 @@ def from_tensorflow( return cls( xyxy=boxes, confidence=tensorflow_results["detection_scores"][0].numpy(), - class_id=tensorflow_results["detection_classes"][0].numpy().astype( - int), + class_id=tensorflow_results["detection_classes"][0].numpy().astype(int), ) @classmethod @@ -488,8 +483,7 @@ def from_deepsparse(cls, deepsparse_results) -> Detections: return cls( xyxy=np.array(deepsparse_results.boxes[0]), confidence=np.array(deepsparse_results.scores[0]), - class_id=np.array(deepsparse_results.labels[0]).astype( - float).astype(int), + class_id=np.array(deepsparse_results.labels[0]).astype(float).astype(int), ) @classmethod @@ -576,29 +570,24 @@ class names. If provided, the resulting Detections object will contain Class names values can be accessed using `detections["class_name"]`. """ # noqa: E501 // docs - class_ids = transformers_results["labels"].cpu( - ).detach().numpy().astype(int) + class_ids = transformers_results["labels"].cpu().detach().numpy().astype(int) data = {} if id2label is not None: - class_names = np.array([id2label[class_id] - for class_id in class_ids]) + class_names = np.array([id2label[class_id] for class_id in class_ids]) data[CLASS_NAME_DATA_FIELD] = class_names if "boxes" in transformers_results: return cls( xyxy=transformers_results["boxes"].cpu().detach().numpy(), - confidence=transformers_results["scores"].cpu( - ).detach().numpy(), + confidence=transformers_results["scores"].cpu().detach().numpy(), class_id=class_ids, data=data, ) elif "masks" in transformers_results: - masks = transformers_results["masks"].cpu( - ).detach().numpy().astype(bool) + masks = transformers_results["masks"].cpu().detach().numpy().astype(bool) return cls( xyxy=mask_to_xyxy(masks), mask=masks, - confidence=transformers_results["scores"].cpu( - ).detach().numpy(), + confidence=transformers_results["scores"].cpu().detach().numpy(), class_id=class_ids, data=data, ) @@ -641,8 +630,7 @@ class IDs, and confidences of the predictions. """ return cls( - xyxy=detectron2_results["instances"].pred_boxes.tensor.cpu( - ).numpy(), + xyxy=detectron2_results["instances"].pred_boxes.tensor.cpu().numpy(), confidence=detectron2_results["instances"].scores.cpu().numpy(), class_id=detectron2_results["instances"] .pred_classes.cpu() @@ -685,8 +673,7 @@ def from_inference(cls, roboflow_result: Union[dict, Any]) -> Detections: Class names values can be accessed using `detections["class_name"]`. """ with suppress(AttributeError): - roboflow_result = roboflow_result.dict( - exclude_none=True, by_alias=True) + roboflow_result = roboflow_result.dict(exclude_none=True, by_alias=True) xyxy, confidence, class_id, masks, trackers, data = process_roboflow_result( roboflow_result=roboflow_result ) @@ -778,8 +765,7 @@ def from_sam(cls, sam_result: List[dict]) -> Detections: ) xywh = np.array([mask["bbox"] for mask in sorted_generated_masks]) - mask = np.array([mask["segmentation"] - for mask in sorted_generated_masks]) + mask = np.array([mask["segmentation"] for mask in sorted_generated_masks]) if np.asarray(xywh).shape[0] == 0: return cls.empty() @@ -1006,8 +992,7 @@ def stack_or_none(name: str): if all(d.__getattribute__(name) is None for d in detections_list): return None if any(d.__getattribute__(name) is None for d in detections_list): - raise ValueError( - f"All or none of the '{name}' fields must be None") + raise ValueError(f"All or none of the '{name}' fields must be None") return ( np.vstack([d.__getattribute__(name) for d in detections_list]) if name == "mask" @@ -1193,8 +1178,7 @@ def _set_at_index(self, index: int, other: Detections): ValueError: If `other` is not made of exactly one element. """ if len(other) != 1: - raise ValueError( - "Detection to set from must have exactly one element.") + raise ValueError("Detection to set from must have exactly one element.") self.xyxy[index] = other.xyxy[0] if self.mask is not None and other.mask is not None: @@ -1266,8 +1250,7 @@ def with_nms( ), "Detections confidence must be given for NMS to be executed." if class_agnostic: - predictions = np.hstack( - (self.xyxy, self.confidence.reshape(-1, 1))) + predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1))) else: assert self.class_id is not None, ( "Detections class_id must be given for NMS to be executed. If you" @@ -1323,8 +1306,7 @@ def with_nmm( ), "Detections confidence must be given for NMM to be executed." if class_agnostic: - predictions = np.hstack( - (self.xyxy, self.confidence.reshape(-1, 1))) + predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1))) keep_to_merge_list = non_max_merge(predictions, threshold) else: assert self.class_id is not None, ( @@ -1343,8 +1325,7 @@ def with_nmm( result = [] for keep_ind, merge_ind_list in keep_to_merge_list.items(): for merge_ind in merge_ind_list: - box_iou = box_iou_batch( - self[keep_ind].xyxy, self[merge_ind].xyxy)[0] + box_iou = box_iou_batch(self[keep_ind].xyxy, self[merge_ind].xyxy)[0] if box_iou > threshold: merged_detection = _merge_object_detection_pair( self[keep_ind], self[merge_ind] From 97c407101a2755db3288613c97cbbcda4e8105c0 Mon Sep 17 00:00:00 2001 From: Linas Kondrackis Date: Mon, 6 May 2024 17:24:41 +0300 Subject: [PATCH 08/26] NMM: Move detections merge into Detections class. * No other changes! --- supervision/detection/core.py | 251 ++++++++++++++++++---------------- 1 file changed, 135 insertions(+), 116 deletions(-) diff --git a/supervision/detection/core.py b/supervision/detection/core.py index fa34c158d..501a27e9d 100644 --- a/supervision/detection/core.py +++ b/supervision/detection/core.py @@ -27,103 +27,6 @@ from supervision.validators import validate_detections_fields -def _merge_object_detection_pair(det1: Detections, det2: Detections) -> Detections: - """ - Merges two Detections object into a single Detections object. - Assumes each Detections contains exactly one object. - - A `winning` detection is determined based on the confidence score of the two - input detections. This winning detection is then used to specify which - `class_id`, `tracker_id`, and `data` to include in the merged Detections object. - - The resulting `confidence` of the merged object is calculated by the weighted - contribution of each detection to the merged object. - The bounding boxes and masks of the two input detections are merged into a - single bounding box and mask, respectively. - - Args: - det1 (Detections): - The first Detections object - det2 (Detections): - The second Detections object - - Returns: - Detections: A new Detections object, with merged attributes. - - Raises: - ValueError: If the input Detections objects do not have exactly 1 detected - object. - - Example: - ```python - import cv2 - import supervision as sv - from inference import get_model - - image = cv2.imread() - model = get_model(model_id="yolov8s-640") - - result = model.infer(image)[0] - detections = sv.Detections.from_inference(result) - - merged_detections = merge_object_detection_pair( - detections[0], detections[1]) - ``` - """ - if len(det1) != 1 or len(det2) != 1: - raise ValueError("Both Detections should have exactly 1 detected object.") - - if det2.confidence is None: - winning_det = det1 - elif det1.confidence is None: - winning_det = det2 - elif det1.confidence[0] >= det2.confidence[0]: - winning_det = det1 - else: - winning_det = det2 - - area_det1 = (det1.xyxy[0][2] - det1.xyxy[0][0]) * ( - det1.xyxy[0][3] - det1.xyxy[0][1] - ) - area_det2 = (det2.xyxy[0][2] - det2.xyxy[0][0]) * ( - det2.xyxy[0][3] - det2.xyxy[0][1] - ) - - merged_x1, merged_y1 = np.minimum(det1.xyxy[0][:2], det2.xyxy[0][:2]) - merged_x2, merged_y2 = np.maximum(det1.xyxy[0][2:], det2.xyxy[0][2:]) - - merged_xy = np.array([[merged_x1, merged_y1, merged_x2, merged_y2]]) - - winning_class_id = winning_det.class_id - - if det1.confidence is None or det2.confidence is None: - merged_confidence = None - else: - merged_confidence = ( - area_det1 * det1.confidence[0] + area_det2 * det2.confidence[0] - ) / (area_det1 + area_det2) - merged_confidence = np.array([merged_confidence]) - - merged_mask = None - if det1.mask is not None and det2.mask is not None: - merged_mask = np.logical_or(det1.mask, det2.mask) - - winning_tracker_id = winning_det.tracker_id - - winning_data = None - if det1.data and det2.data: - winning_data = winning_det.data - - return Detections( - xyxy=merged_xy, - mask=merged_mask, - confidence=merged_confidence, - class_id=winning_class_id, - tracker_id=winning_tracker_id, - data=winning_data, - ) - - @dataclass class Detections: """ @@ -295,7 +198,8 @@ def from_yolov5(cls, yolov5_results) -> Detections: detections = sv.Detections.from_yolov5(result) ``` """ - yolov5_detections_predictions = yolov5_results.pred[0].cpu().cpu().numpy() + yolov5_detections_predictions = yolov5_results.pred[0].cpu( + ).cpu().numpy() return cls( xyxy=yolov5_detections_predictions[:, :4], @@ -342,7 +246,8 @@ def from_ultralytics(cls, ultralytics_results) -> Detections: if "obb" in ultralytics_results and ultralytics_results.obb is not None: class_id = ultralytics_results.obb.cls.cpu().numpy().astype(int) - class_names = np.array([ultralytics_results.names[i] for i in class_id]) + class_names = np.array( + [ultralytics_results.names[i] for i in class_id]) oriented_box_coordinates = ultralytics_results.obb.xyxyxyxy.cpu().numpy() return cls( xyxy=ultralytics_results.obb.xyxy.cpu().numpy(), @@ -358,7 +263,8 @@ def from_ultralytics(cls, ultralytics_results) -> Detections: ) class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int) - class_names = np.array([ultralytics_results.names[i] for i in class_id]) + class_names = np.array([ultralytics_results.names[i] + for i in class_id]) return cls( xyxy=ultralytics_results.boxes.xyxy.cpu().numpy(), confidence=ultralytics_results.boxes.conf.cpu().numpy(), @@ -446,7 +352,8 @@ def from_tensorflow( return cls( xyxy=boxes, confidence=tensorflow_results["detection_scores"][0].numpy(), - class_id=tensorflow_results["detection_classes"][0].numpy().astype(int), + class_id=tensorflow_results["detection_classes"][0].numpy().astype( + int), ) @classmethod @@ -483,7 +390,8 @@ def from_deepsparse(cls, deepsparse_results) -> Detections: return cls( xyxy=np.array(deepsparse_results.boxes[0]), confidence=np.array(deepsparse_results.scores[0]), - class_id=np.array(deepsparse_results.labels[0]).astype(float).astype(int), + class_id=np.array(deepsparse_results.labels[0]).astype( + float).astype(int), ) @classmethod @@ -570,24 +478,29 @@ class names. If provided, the resulting Detections object will contain Class names values can be accessed using `detections["class_name"]`. """ # noqa: E501 // docs - class_ids = transformers_results["labels"].cpu().detach().numpy().astype(int) + class_ids = transformers_results["labels"].cpu( + ).detach().numpy().astype(int) data = {} if id2label is not None: - class_names = np.array([id2label[class_id] for class_id in class_ids]) + class_names = np.array([id2label[class_id] + for class_id in class_ids]) data[CLASS_NAME_DATA_FIELD] = class_names if "boxes" in transformers_results: return cls( xyxy=transformers_results["boxes"].cpu().detach().numpy(), - confidence=transformers_results["scores"].cpu().detach().numpy(), + confidence=transformers_results["scores"].cpu( + ).detach().numpy(), class_id=class_ids, data=data, ) elif "masks" in transformers_results: - masks = transformers_results["masks"].cpu().detach().numpy().astype(bool) + masks = transformers_results["masks"].cpu( + ).detach().numpy().astype(bool) return cls( xyxy=mask_to_xyxy(masks), mask=masks, - confidence=transformers_results["scores"].cpu().detach().numpy(), + confidence=transformers_results["scores"].cpu( + ).detach().numpy(), class_id=class_ids, data=data, ) @@ -630,7 +543,8 @@ class IDs, and confidences of the predictions. """ return cls( - xyxy=detectron2_results["instances"].pred_boxes.tensor.cpu().numpy(), + xyxy=detectron2_results["instances"].pred_boxes.tensor.cpu( + ).numpy(), confidence=detectron2_results["instances"].scores.cpu().numpy(), class_id=detectron2_results["instances"] .pred_classes.cpu() @@ -673,7 +587,8 @@ def from_inference(cls, roboflow_result: Union[dict, Any]) -> Detections: Class names values can be accessed using `detections["class_name"]`. """ with suppress(AttributeError): - roboflow_result = roboflow_result.dict(exclude_none=True, by_alias=True) + roboflow_result = roboflow_result.dict( + exclude_none=True, by_alias=True) xyxy, confidence, class_id, masks, trackers, data = process_roboflow_result( roboflow_result=roboflow_result ) @@ -765,7 +680,8 @@ def from_sam(cls, sam_result: List[dict]) -> Detections: ) xywh = np.array([mask["bbox"] for mask in sorted_generated_masks]) - mask = np.array([mask["segmentation"] for mask in sorted_generated_masks]) + mask = np.array([mask["segmentation"] + for mask in sorted_generated_masks]) if np.asarray(xywh).shape[0] == 0: return cls.empty() @@ -992,7 +908,8 @@ def stack_or_none(name: str): if all(d.__getattribute__(name) is None for d in detections_list): return None if any(d.__getattribute__(name) is None for d in detections_list): - raise ValueError(f"All or none of the '{name}' fields must be None") + raise ValueError( + f"All or none of the '{name}' fields must be None") return ( np.vstack([d.__getattribute__(name) for d in detections_list]) if name == "mask" @@ -1178,7 +1095,8 @@ def _set_at_index(self, index: int, other: Detections): ValueError: If `other` is not made of exactly one element. """ if len(other) != 1: - raise ValueError("Detection to set from must have exactly one element.") + raise ValueError( + "Detection to set from must have exactly one element.") self.xyxy[index] = other.xyxy[0] if self.mask is not None and other.mask is not None: @@ -1250,7 +1168,8 @@ def with_nms( ), "Detections confidence must be given for NMS to be executed." if class_agnostic: - predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1))) + predictions = np.hstack( + (self.xyxy, self.confidence.reshape(-1, 1))) else: assert self.class_id is not None, ( "Detections class_id must be given for NMS to be executed. If you" @@ -1306,7 +1225,8 @@ def with_nmm( ), "Detections confidence must be given for NMM to be executed." if class_agnostic: - predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1))) + predictions = np.hstack( + (self.xyxy, self.confidence.reshape(-1, 1))) keep_to_merge_list = non_max_merge(predictions, threshold) else: assert self.class_id is not None, ( @@ -1325,12 +1245,111 @@ def with_nmm( result = [] for keep_ind, merge_ind_list in keep_to_merge_list.items(): for merge_ind in merge_ind_list: - box_iou = box_iou_batch(self[keep_ind].xyxy, self[merge_ind].xyxy)[0] + box_iou = box_iou_batch( + self[keep_ind].xyxy, self[merge_ind].xyxy)[0] if box_iou > threshold: - merged_detection = _merge_object_detection_pair( + merged_detection = self._merge_object_detection_pair( self[keep_ind], self[merge_ind] ) self._set_at_index(keep_ind, merged_detection) result.append(self[keep_ind]) return Detections.merge(result) + + @staticmethod + def _merge_object_detection_pair(det1: Detections, det2: Detections) -> Detections: + """ + Merges two Detections object into a single Detections object. + Assumes each Detections contains exactly one object. + + A `winning` detection is determined based on the confidence score of the two + input detections. This winning detection is then used to specify which + `class_id`, `tracker_id`, and `data` to include in the merged Detections object. + + The resulting `confidence` of the merged object is calculated by the weighted + contribution of each detection to the merged object. + The bounding boxes and masks of the two input detections are merged into a + single bounding box and mask, respectively. + + Args: + det1 (Detections): + The first Detections object + det2 (Detections): + The second Detections object + + Returns: + Detections: A new Detections object, with merged attributes. + + Raises: + ValueError: If the input Detections objects do not have exactly 1 detected + object. + + Example: + ```python + import cv2 + import supervision as sv + from inference import get_model + + image = cv2.imread() + model = get_model(model_id="yolov8s-640") + + result = model.infer(image)[0] + detections = sv.Detections.from_inference(result) + + merged_detections = merge_object_detection_pair( + detections[0], detections[1]) + ``` + """ + if len(det1) != 1 or len(det2) != 1: + raise ValueError( + "Both Detections should have exactly 1 detected object.") + + if det2.confidence is None: + winning_det = det1 + elif det1.confidence is None: + winning_det = det2 + elif det1.confidence[0] >= det2.confidence[0]: + winning_det = det1 + else: + winning_det = det2 + + area_det1 = (det1.xyxy[0][2] - det1.xyxy[0][0]) * ( + det1.xyxy[0][3] - det1.xyxy[0][1] + ) + area_det2 = (det2.xyxy[0][2] - det2.xyxy[0][0]) * ( + det2.xyxy[0][3] - det2.xyxy[0][1] + ) + + merged_x1, merged_y1 = np.minimum(det1.xyxy[0][:2], det2.xyxy[0][:2]) + merged_x2, merged_y2 = np.maximum(det1.xyxy[0][2:], det2.xyxy[0][2:]) + + merged_xy = np.array([[merged_x1, merged_y1, merged_x2, merged_y2]]) + + winning_class_id = winning_det.class_id + + if det1.confidence is None or det2.confidence is None: + merged_confidence = None + else: + merged_confidence = ( + area_det1 * det1.confidence[0] + area_det2 * det2.confidence[0] + ) / (area_det1 + area_det2) + merged_confidence = np.array([merged_confidence]) + + merged_mask = None + if det1.mask is not None and det2.mask is not None: + merged_mask = np.logical_or(det1.mask, det2.mask) + + winning_tracker_id = winning_det.tracker_id + + winning_data = None + if det1.data and det2.data: + winning_data = winning_det.data + + return Detections( + xyxy=merged_xy, + mask=merged_mask, + confidence=merged_confidence, + class_id=winning_class_id, + tracker_id=winning_tracker_id, + data=winning_data, + ) From 204669b08c650378cb03553c55ec417975a4371e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 6 May 2024 14:25:13 +0000 Subject: [PATCH 09/26] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?= =?UTF-8?q?=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/detection/core.py | 57 ++++++++++++----------------------- 1 file changed, 19 insertions(+), 38 deletions(-) diff --git a/supervision/detection/core.py b/supervision/detection/core.py index 501a27e9d..beb68923d 100644 --- a/supervision/detection/core.py +++ b/supervision/detection/core.py @@ -198,8 +198,7 @@ def from_yolov5(cls, yolov5_results) -> Detections: detections = sv.Detections.from_yolov5(result) ``` """ - yolov5_detections_predictions = yolov5_results.pred[0].cpu( - ).cpu().numpy() + yolov5_detections_predictions = yolov5_results.pred[0].cpu().cpu().numpy() return cls( xyxy=yolov5_detections_predictions[:, :4], @@ -246,8 +245,7 @@ def from_ultralytics(cls, ultralytics_results) -> Detections: if "obb" in ultralytics_results and ultralytics_results.obb is not None: class_id = ultralytics_results.obb.cls.cpu().numpy().astype(int) - class_names = np.array( - [ultralytics_results.names[i] for i in class_id]) + class_names = np.array([ultralytics_results.names[i] for i in class_id]) oriented_box_coordinates = ultralytics_results.obb.xyxyxyxy.cpu().numpy() return cls( xyxy=ultralytics_results.obb.xyxy.cpu().numpy(), @@ -263,8 +261,7 @@ def from_ultralytics(cls, ultralytics_results) -> Detections: ) class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int) - class_names = np.array([ultralytics_results.names[i] - for i in class_id]) + class_names = np.array([ultralytics_results.names[i] for i in class_id]) return cls( xyxy=ultralytics_results.boxes.xyxy.cpu().numpy(), confidence=ultralytics_results.boxes.conf.cpu().numpy(), @@ -352,8 +349,7 @@ def from_tensorflow( return cls( xyxy=boxes, confidence=tensorflow_results["detection_scores"][0].numpy(), - class_id=tensorflow_results["detection_classes"][0].numpy().astype( - int), + class_id=tensorflow_results["detection_classes"][0].numpy().astype(int), ) @classmethod @@ -390,8 +386,7 @@ def from_deepsparse(cls, deepsparse_results) -> Detections: return cls( xyxy=np.array(deepsparse_results.boxes[0]), confidence=np.array(deepsparse_results.scores[0]), - class_id=np.array(deepsparse_results.labels[0]).astype( - float).astype(int), + class_id=np.array(deepsparse_results.labels[0]).astype(float).astype(int), ) @classmethod @@ -478,29 +473,24 @@ class names. If provided, the resulting Detections object will contain Class names values can be accessed using `detections["class_name"]`. """ # noqa: E501 // docs - class_ids = transformers_results["labels"].cpu( - ).detach().numpy().astype(int) + class_ids = transformers_results["labels"].cpu().detach().numpy().astype(int) data = {} if id2label is not None: - class_names = np.array([id2label[class_id] - for class_id in class_ids]) + class_names = np.array([id2label[class_id] for class_id in class_ids]) data[CLASS_NAME_DATA_FIELD] = class_names if "boxes" in transformers_results: return cls( xyxy=transformers_results["boxes"].cpu().detach().numpy(), - confidence=transformers_results["scores"].cpu( - ).detach().numpy(), + confidence=transformers_results["scores"].cpu().detach().numpy(), class_id=class_ids, data=data, ) elif "masks" in transformers_results: - masks = transformers_results["masks"].cpu( - ).detach().numpy().astype(bool) + masks = transformers_results["masks"].cpu().detach().numpy().astype(bool) return cls( xyxy=mask_to_xyxy(masks), mask=masks, - confidence=transformers_results["scores"].cpu( - ).detach().numpy(), + confidence=transformers_results["scores"].cpu().detach().numpy(), class_id=class_ids, data=data, ) @@ -543,8 +533,7 @@ class IDs, and confidences of the predictions. """ return cls( - xyxy=detectron2_results["instances"].pred_boxes.tensor.cpu( - ).numpy(), + xyxy=detectron2_results["instances"].pred_boxes.tensor.cpu().numpy(), confidence=detectron2_results["instances"].scores.cpu().numpy(), class_id=detectron2_results["instances"] .pred_classes.cpu() @@ -587,8 +576,7 @@ def from_inference(cls, roboflow_result: Union[dict, Any]) -> Detections: Class names values can be accessed using `detections["class_name"]`. """ with suppress(AttributeError): - roboflow_result = roboflow_result.dict( - exclude_none=True, by_alias=True) + roboflow_result = roboflow_result.dict(exclude_none=True, by_alias=True) xyxy, confidence, class_id, masks, trackers, data = process_roboflow_result( roboflow_result=roboflow_result ) @@ -680,8 +668,7 @@ def from_sam(cls, sam_result: List[dict]) -> Detections: ) xywh = np.array([mask["bbox"] for mask in sorted_generated_masks]) - mask = np.array([mask["segmentation"] - for mask in sorted_generated_masks]) + mask = np.array([mask["segmentation"] for mask in sorted_generated_masks]) if np.asarray(xywh).shape[0] == 0: return cls.empty() @@ -908,8 +895,7 @@ def stack_or_none(name: str): if all(d.__getattribute__(name) is None for d in detections_list): return None if any(d.__getattribute__(name) is None for d in detections_list): - raise ValueError( - f"All or none of the '{name}' fields must be None") + raise ValueError(f"All or none of the '{name}' fields must be None") return ( np.vstack([d.__getattribute__(name) for d in detections_list]) if name == "mask" @@ -1095,8 +1081,7 @@ def _set_at_index(self, index: int, other: Detections): ValueError: If `other` is not made of exactly one element. """ if len(other) != 1: - raise ValueError( - "Detection to set from must have exactly one element.") + raise ValueError("Detection to set from must have exactly one element.") self.xyxy[index] = other.xyxy[0] if self.mask is not None and other.mask is not None: @@ -1168,8 +1153,7 @@ def with_nms( ), "Detections confidence must be given for NMS to be executed." if class_agnostic: - predictions = np.hstack( - (self.xyxy, self.confidence.reshape(-1, 1))) + predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1))) else: assert self.class_id is not None, ( "Detections class_id must be given for NMS to be executed. If you" @@ -1225,8 +1209,7 @@ def with_nmm( ), "Detections confidence must be given for NMM to be executed." if class_agnostic: - predictions = np.hstack( - (self.xyxy, self.confidence.reshape(-1, 1))) + predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1))) keep_to_merge_list = non_max_merge(predictions, threshold) else: assert self.class_id is not None, ( @@ -1245,8 +1228,7 @@ def with_nmm( result = [] for keep_ind, merge_ind_list in keep_to_merge_list.items(): for merge_ind in merge_ind_list: - box_iou = box_iou_batch( - self[keep_ind].xyxy, self[merge_ind].xyxy)[0] + box_iou = box_iou_batch(self[keep_ind].xyxy, self[merge_ind].xyxy)[0] if box_iou > threshold: merged_detection = self._merge_object_detection_pair( self[keep_ind], self[merge_ind] @@ -1301,8 +1283,7 @@ def _merge_object_detection_pair(det1: Detections, det2: Detections) -> Detectio ``` """ if len(det1) != 1 or len(det2) != 1: - raise ValueError( - "Both Detections should have exactly 1 detected object.") + raise ValueError("Both Detections should have exactly 1 detected object.") if det2.confidence is None: winning_det = det1 From c3b77d05c09f4a0192fb48aa95ab6ef701c557ed Mon Sep 17 00:00:00 2001 From: Linas Kondrackis Date: Tue, 14 May 2024 17:12:19 +0300 Subject: [PATCH 10/26] Rename, remove functions, unit-test & change `merge_object_detection_pair` --- supervision/detection/core.py | 176 ++++++++++++++++----------------- supervision/detection/utils.py | 29 +----- test/detection/test_core.py | 129 +++++++++++++++++++++++- 3 files changed, 219 insertions(+), 115 deletions(-) diff --git a/supervision/detection/core.py b/supervision/detection/core.py index d56ba5160..0777571fc 100644 --- a/supervision/detection/core.py +++ b/supervision/detection/core.py @@ -8,8 +8,9 @@ from supervision.config import CLASS_NAME_DATA_FIELD, ORIENTED_BOX_COORDINATES from supervision.detection.utils import ( - batch_non_max_merge, + box_batch_non_max_merge, box_iou_batch, + box_non_max_merge, box_non_max_suppression, calculate_masks_centroids, extract_ultralytics_masks, @@ -18,7 +19,6 @@ mask_non_max_suppression, mask_to_xyxy, merge_data, - non_max_merge, process_roboflow_result, xywh_to_xyxy, ) @@ -1213,7 +1213,7 @@ def with_nmm( if class_agnostic: predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1))) - keep_to_merge_list = non_max_merge(predictions, threshold) + keep_to_merge_list = box_non_max_merge(predictions, threshold) else: assert self.class_id is not None, ( "Detections class_id must be given for NMS to be executed. If you" @@ -1226,14 +1226,14 @@ def with_nmm( self.class_id.reshape(-1, 1), ) ) - keep_to_merge_list = batch_non_max_merge(predictions, threshold) + keep_to_merge_list = box_batch_non_max_merge(predictions, threshold) result = [] for keep_ind, merge_ind_list in keep_to_merge_list.items(): for merge_ind in merge_ind_list: box_iou = box_iou_batch(self[keep_ind].xyxy, self[merge_ind].xyxy)[0] if box_iou > threshold: - merged_detection = self._merge_object_detection_pair( + merged_detection = self.merge_object_detection_pair( self[keep_ind], self[merge_ind] ) self._set_at_index(keep_ind, merged_detection) @@ -1241,99 +1241,95 @@ def with_nmm( return Detections.merge(result) - @staticmethod - def _merge_object_detection_pair(det1: Detections, det2: Detections) -> Detections: - """ - Merges two Detections object into a single Detections object. - Assumes each Detections contains exactly one object. - - A `winning` detection is determined based on the confidence score of the two - input detections. This winning detection is then used to specify which - `class_id`, `tracker_id`, and `data` to include in the merged Detections object. - - The resulting `confidence` of the merged object is calculated by the weighted - contribution of each detection to the merged object. - The bounding boxes and masks of the two input detections are merged into a - single bounding box and mask, respectively. - - Args: - det1 (Detections): - The first Detections object - det2 (Detections): - The second Detections object - - Returns: - Detections: A new Detections object, with merged attributes. - - Raises: - ValueError: If the input Detections objects do not have exactly 1 detected - object. - - Example: - ```python - import cv2 - import supervision as sv - from inference import get_model - image = cv2.imread() - model = get_model(model_id="yolov8s-640") - - result = model.infer(image)[0] - detections = sv.Detections.from_inference(result) +def merge_object_detection_pair(det1: Detections, det2: Detections) -> Detections: + """ + Merges two Detections object into a single Detections object. + Assumes each Detections contains exactly one object. - merged_detections = merge_object_detection_pair( - detections[0], detections[1]) - ``` - """ - if len(det1) != 1 or len(det2) != 1: - raise ValueError("Both Detections should have exactly 1 detected object.") - - if det2.confidence is None: - winning_det = det1 - elif det1.confidence is None: - winning_det = det2 - elif det1.confidence[0] >= det2.confidence[0]: - winning_det = det1 - else: - winning_det = det2 + A `winning` detection is determined based on the confidence score of the two + input detections. This winning detection is then used to specify which + `class_id`, `tracker_id`, and `data` to include in the merged Detections object. - area_det1 = (det1.xyxy[0][2] - det1.xyxy[0][0]) * ( - det1.xyxy[0][3] - det1.xyxy[0][1] - ) - area_det2 = (det2.xyxy[0][2] - det2.xyxy[0][0]) * ( - det2.xyxy[0][3] - det2.xyxy[0][1] - ) + The resulting `confidence` of the merged object is calculated by the weighted + contribution of ea detection to the merged object. + The bounding boxes and masks of the two input detections are merged into a + single bounding box and mask, respectively. - merged_x1, merged_y1 = np.minimum(det1.xyxy[0][:2], det2.xyxy[0][:2]) - merged_x2, merged_y2 = np.maximum(det1.xyxy[0][2:], det2.xyxy[0][2:]) + Args: + det1 (Detections): + The first Detections object + det2 (Detections): + The second Detections object - merged_xy = np.array([[merged_x1, merged_y1, merged_x2, merged_y2]]) + Returns: + Detections: A new Detections object, with merged attributes. - winning_class_id = winning_det.class_id + Raises: + ValueError: If the input Detections objects do not have exactly 1 detected + object. - if det1.confidence is None or det2.confidence is None: - merged_confidence = None - else: - merged_confidence = ( - area_det1 * det1.confidence[0] + area_det2 * det2.confidence[0] - ) / (area_det1 + area_det2) - merged_confidence = np.array([merged_confidence]) + Example: + ```python + import cv2 + import supervision as sv + from inference import get_model - merged_mask = None - if det1.mask is not None and det2.mask is not None: - merged_mask = np.logical_or(det1.mask, det2.mask) + image = cv2.imread() + model = get_model(model_id="yolov8s-640") - winning_tracker_id = winning_det.tracker_id + result = model.infer(image)[0] + detections = sv.Detections.from_inference(result) - winning_data = None - if det1.data and det2.data: - winning_data = winning_det.data + merged_detections = merge_object_detection_pair( + detections[0], detections[1]) + ``` + """ + if len(det1) != 1 or len(det2) != 1: + raise ValueError("Both Detections should have exactly 1 detected object.") + + if det2.confidence is None: + winning_det = det1 + elif det1.confidence is None: + winning_det = det2 + elif det1.confidence[0] >= det2.confidence[0]: + winning_det = det1 + else: + winning_det = det2 + + area_det1 = (det1.xyxy[0][2] - det1.xyxy[0][0]) * ( + det1.xyxy[0][3] - det1.xyxy[0][1] + ) + area_det2 = (det2.xyxy[0][2] - det2.xyxy[0][0]) * ( + det2.xyxy[0][3] - det2.xyxy[0][1] + ) - return Detections( - xyxy=merged_xy, - mask=merged_mask, - confidence=merged_confidence, - class_id=winning_class_id, - tracker_id=winning_tracker_id, - data=winning_data, - ) + merged_x1, merged_y1 = np.minimum(det1.xyxy[0][:2], det2.xyxy[0][:2]) + merged_x2, merged_y2 = np.maximum(det1.xyxy[0][2:], det2.xyxy[0][2:]) + merged_xy = np.array([[merged_x1, merged_y1, merged_x2, merged_y2]]) + + if det2.mask is None or det1.mask is None: + merged_mask = winning_det.mask + else: + merged_mask = np.logical_or(det1.mask, det2.mask) + + if det1.confidence is None or det2.confidence is None: + merged_confidence = winning_det.confidence + else: + merged_confidence = ( + area_det1 * det1.confidence[0] + area_det2 * det2.confidence[0] + ) / (area_det1 + area_det2) + merged_confidence = np.array([merged_confidence]) + + winning_class_id = winning_det.class_id + winning_tracker_id = winning_det.tracker_id + winning_data = winning_det.data + + return Detections( + xyxy=merged_xy, + mask=merged_mask, + confidence=merged_confidence, + class_id=winning_class_id, + tracker_id=winning_tracker_id, + data=winning_data, + ) diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py index d2e403a49..bd20ab37d 100644 --- a/supervision/detection/utils.py +++ b/supervision/detection/utils.py @@ -274,7 +274,7 @@ def box_non_max_suppression( return keep[sort_index.argsort()] -def non_max_merge( +def box_non_max_merge( predictions: np.ndarray, threshold: float = 0.5 ) -> Dict[int, List[int]]: """ @@ -353,7 +353,7 @@ def non_max_merge( return keep_to_merge_list -def batch_non_max_merge( +def box_batch_non_max_merge( predictions: np.ndarray, threshold: float = 0.5 ) -> Dict[int, List[int]]: """ @@ -375,7 +375,9 @@ def batch_non_max_merge( keep_to_merge_list = {} for category_id in np.unique(category_ids): curr_indices = np.where(category_ids == category_id)[0] - curr_keep_to_merge_list = non_max_merge(predictions[curr_indices], threshold) + curr_keep_to_merge_list = box_non_max_merge( + predictions[curr_indices], threshold + ) curr_indices_list = curr_indices.tolist() for curr_keep, curr_merge_list in curr_keep_to_merge_list.items(): keep = curr_indices_list[curr_keep] @@ -384,27 +386,6 @@ def batch_non_max_merge( return keep_to_merge_list -def get_merged_bbox(bbox1: np.ndarray, bbox2: np.ndarray) -> np.ndarray: - """ - Merges two bounding boxes into one. - - Args: - bbox1 (np.ndarray): A numpy array of shape `(, 4)` where the - row corresponds to a bounding box in - the format `(x_min, y_min, x_max, y_max)`. - bbox2 (np.ndarray): A numpy array of shape `(, 4)` where the - row corresponds to a bounding box in - the format `(x_min, y_min, x_max, y_max)`. - - Returns: - np.ndarray: A numpy array of shape `(, 4)` where the new - bounding box is the merged bounding box of `bbox1` and `bbox2`. - """ - left_top = np.minimum(bbox1[0][:2], bbox2[0][:2]) - right_bottom = np.maximum(bbox1[0][2:], bbox2[0][2:]) - return np.array([np.concatenate([left_top, right_bottom])]) - - def clip_boxes(xyxy: np.ndarray, resolution_wh: Tuple[int, int]) -> np.ndarray: """ Clips bounding boxes coordinates to fit within the frame resolution. diff --git a/test/detection/test_core.py b/test/detection/test_core.py index 12f3de281..31e56decd 100644 --- a/test/detection/test_core.py +++ b/test/detection/test_core.py @@ -5,7 +5,7 @@ import numpy as np import pytest -from supervision.detection.core import Detections +from supervision.detection.core import Detections, merge_object_detection_pair from supervision.geometry.core import Position PREDICTIONS = np.array( @@ -421,3 +421,130 @@ def test_equal( detections_a: Detections, detections_b: Detections, expected_result: bool ) -> None: assert (detections_a == detections_b) == expected_result + + +@pytest.mark.parametrize( + "detection_1, detection_2, expected_result, exception", + [ + ( + mock_detections( + xyxy=[[10, 10, 30, 30]], + ), + mock_detections( + xyxy=[[10, 10, 30, 30]], + ), + mock_detections( + xyxy=[[10, 10, 30, 30]], + ), + DoesNotRaise(), + ), # Merge with self + ( + mock_detections( + xyxy=[[10, 10, 30, 30]], + ), + Detections.empty(), + None, + pytest.raises(ValueError), + ), # merge with empty: error + ( + mock_detections( + xyxy=[[10, 10, 30, 30]], + ), + mock_detections( + xyxy=[[10, 10, 30, 30], [40, 40, 60, 60]], + ), + None, + pytest.raises(ValueError), + ), # merge with 2+ objects: error + ( + mock_detections( + xyxy=[[10, 10, 30, 30]], + confidence=[0.1], + class_id=[1], + mask=[np.array([[1, 1, 0], [1, 1, 0], [0, 0, 0]], dtype=bool)], + tracker_id=[1], + data={"key_1": [1]}, + ), + mock_detections( + xyxy=[[20, 20, 40, 40]], + confidence=[0.1], + class_id=[2], + mask=[np.array([[0, 0, 0], [0, 1, 1], [0, 1, 1]], dtype=bool)], + tracker_id=[2], + data={"key_2": [2]}, + ), + mock_detections( + xyxy=[[10, 10, 40, 40]], + confidence=[0.1], + class_id=[1], + mask=[np.array([[1, 1, 0], [1, 1, 1], [0, 1, 1]], dtype=bool)], + tracker_id=[1], + data={"key_1": [1]}, + ), + DoesNotRaise(), + ), # Same confidence - merge box & mask, tiebreak to detection_1 + ( + mock_detections( + xyxy=[[0, 0, 20, 20]], + confidence=[0.1], + class_id=[1], + mask=[np.array([[1, 1, 0], [1, 1, 0], [0, 0, 0]], dtype=bool)], + tracker_id=[1], + data={"key_1": [1]}, + ), + mock_detections( + xyxy=[[10, 10, 50, 50]], + confidence=[0.2], + class_id=[2], + mask=[np.array([[0, 0, 0], [0, 1, 1], [0, 1, 1]], dtype=bool)], + tracker_id=[2], + data={"key_2": [2]}, + ), + mock_detections( + xyxy=[[0, 0, 50, 50]], + confidence=[(1 * 0.1 + 4 * 0.2) / 5], + class_id=[2], + mask=[np.array([[1, 1, 0], [1, 1, 1], [0, 1, 1]], dtype=bool)], + tracker_id=[2], + data={"key_2": [2]}, + ), + DoesNotRaise(), + ), # Different confidence, different area + ( + mock_detections( + xyxy=[[0, 0, 20, 20]], + confidence=None, + class_id=[1], + mask=[np.array([[1, 1, 0], [1, 1, 0], [0, 0, 0]], dtype=bool)], + tracker_id=[1], + data={"key_1": [1]}, + ), + mock_detections( + xyxy=[[10, 10, 30, 30]], + confidence=[0.2], + class_id=[2], + mask=[np.array([[0, 0, 0], [0, 1, 1], [0, 1, 1]], dtype=bool)], + tracker_id=[2], + data={"key_2": [2]}, + ), + mock_detections( + xyxy=[[0, 0, 30, 30]], + confidence=[0.2], + class_id=[2], + mask=[np.array([[1, 1, 0], [1, 1, 1], [0, 1, 1]], dtype=bool)], + tracker_id=[2], + data={"key_2": [2]}, + ), + DoesNotRaise(), + ), # merge with no confidence + ], +) +def test_merge_object_detection_pair( + detection_1: Detections, + detection_2: Detections, + expected_result: Optional[Detections], + exception: Exception, +): + with exception: + result = merge_object_detection_pair(detection_1, detection_2) + assert result == expected_result From 8014e88944b9f1135448761b0c7f0832df7589ae Mon Sep 17 00:00:00 2001 From: Linas Kondrackis Date: Tue, 14 May 2024 17:42:47 +0300 Subject: [PATCH 11/26] Test box_non_max_merge --- supervision/detection/utils.py | 6 +- test/detection/test_utils.py | 126 +++++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+), 3 deletions(-) diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py index bd20ab37d..f177d0886 100644 --- a/supervision/detection/utils.py +++ b/supervision/detection/utils.py @@ -275,7 +275,7 @@ def box_non_max_suppression( def box_non_max_merge( - predictions: np.ndarray, threshold: float = 0.5 + predictions: np.ndarray, iou_threshold: float = 0.5 ) -> Dict[int, List[int]]: """ Apply greedy version of non-maximum merging to avoid detecting too many @@ -285,7 +285,7 @@ def box_non_max_merge( predictions (np.ndarray): An array of shape `(n, 5)` containing the bounding boxes coordinates in format `[x1, y1, x2, y2]` and the confidence scores. - threshold (float, optional): The intersection-over-union threshold + iou_threshold (float, optional): The intersection-over-union threshold to use for non-maximum suppression. Defaults to 0.5. Returns: @@ -338,7 +338,7 @@ def box_non_max_merge( union = (rem_areas - inter) + areas[idx] match_metric_value = inter / union - mask = match_metric_value < threshold + mask = match_metric_value < iou_threshold mask = mask.astype(np.uint8) matched_box_indices = np.flip(order[np.where(mask == 0)[0]]) unmatched_indices = order[np.where(mask == 1)[0]] diff --git a/test/detection/test_utils.py b/test/detection/test_utils.py index 097c5c6e5..e6f330841 100644 --- a/test/detection/test_utils.py +++ b/test/detection/test_utils.py @@ -6,6 +6,7 @@ from supervision.config import CLASS_NAME_DATA_FIELD from supervision.detection.utils import ( + box_non_max_merge, box_non_max_suppression, calculate_masks_centroids, clip_boxes, @@ -127,6 +128,131 @@ def test_box_non_max_suppression( assert np.array_equal(result, expected_result) +@pytest.mark.parametrize( + "predictions, iou_threshold, expected_result, exception", + [ + ( + np.empty(shape=(0, 5), dtype=float), + 0.5, + {}, + DoesNotRaise(), + ), + ( + np.array([[0, 0, 10, 10, 1.0]]), + 0.5, + {0: []}, + DoesNotRaise(), + ), + ( + np.array([[0, 0, 10, 10, 1.0], [0, 0, 9, 9, 1.0]]), + 0.5, + {1: [0]}, + DoesNotRaise(), + ), # High overlap, tie-break to second det + ( + np.array([[0, 0, 10, 10, 1.0], [0, 0, 9, 9, 0.99]]), + 0.5, + {0: [1]}, + DoesNotRaise(), + ), # High overlap, merge to high confidence + ( + np.array([[0, 0, 10, 10, 0.99], [0, 0, 9, 9, 1.0]]), + 0.5, + {1: [0]}, + DoesNotRaise(), + ), # (test symmetry) High overlap, merge to high confidence + ( + np.array([[0, 0, 10, 10, 0.99], [0, 0, 9, 9, 1.0]]), + 0.5, + {1: [0]}, + DoesNotRaise(), + ), # (test symmetry) High overlap, merge to high confidence + ( + np.array([[0, 0, 10, 10, 1.0], [0, 0, 9, 9, 1.0]]), + 1.0, + {0: [], 1: []}, + DoesNotRaise(), + ), # High IOU required + ( + np.array([[0, 0, 10, 10, 1.0], [0, 0, 9, 9, 1.0]]), + 0.0, + {1: [0]}, + DoesNotRaise(), + ), # No IOU required + ( + np.array([[0, 0, 10, 10, 1.0], [0, 0, 5, 5, 0.9]]), + 0.25, + {0: [1]}, + DoesNotRaise(), + ), # Below IOU requirement + ( + np.array([[0, 0, 10, 10, 1.0], [0, 0, 5, 5, 0.9]]), + 0.26, + {0: [], 1: []}, + DoesNotRaise(), + ), # Above IOU requirement + ( + np.array([[0, 0, 10, 10, 1.0], [0, 0, 9, 9, 1.0], [0, 0, 8, 8, 1.0]]), + 0.5, + {2: [1, 0]}, + DoesNotRaise(), + ), # 3 boxes + ( + np.array( + [ + [0, 0, 10, 10, 1.0], + [0, 0, 9, 9, 1.0], + [5, 5, 10, 10, 1.0], + [6, 6, 10, 10, 1.0], + [9, 9, 10, 10, 1.0], + ] + ), + 0.5, + {1: [0], 3: [2], 4: []}, + DoesNotRaise(), + ), # 5 boxes, 2 merges, 1 separate + ( + np.array( + [ + [0, 0, 2, 1, 1.0], + [1, 0, 3, 1, 1.0], + [2, 0, 4, 1, 1.0], + [3, 0, 5, 1, 1.0], + [4, 0, 6, 1, 1.0], + ] + ), + 0.33, + {0: [], 2: [1], 4: [3]}, + DoesNotRaise(), + ), # sequential merge, half overlap + ( + np.array( + [ + [0, 0, 2, 1, 0.9], + [1, 0, 3, 1, 0.9], + [2, 0, 4, 1, 1.0], + [3, 0, 5, 1, 0.9], + [4, 0, 6, 1, 0.9], + ] + ), + 0.33, + {0: [], 2: [3, 1], 4: []}, + DoesNotRaise(), + ), # confidence + ], +) +def test_box_non_max_merge( + predictions: np.ndarray, + iou_threshold: float, + expected_result: Dict[int, List[int]], + exception: Exception, +) -> None: + with exception: + result = box_non_max_merge(predictions=predictions, iou_threshold=iou_threshold) + + assert result == expected_result + + @pytest.mark.parametrize( "predictions, masks, iou_threshold, expected_result, exception", [ From 26bafec8f732ae921fc44ac068e9ed564a067331 Mon Sep 17 00:00:00 2001 From: Linas Kondrackis Date: Wed, 15 May 2024 09:23:34 +0300 Subject: [PATCH 12/26] Test box_non_max_merge, rename threshold,to __init__ --- supervision/__init__.py | 4 +++- supervision/detection/core.py | 4 ++-- supervision/detection/utils.py | 8 ++++---- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/supervision/__init__.py b/supervision/__init__.py index 16de484a3..3eae2e178 100644 --- a/supervision/__init__.py +++ b/supervision/__init__.py @@ -35,7 +35,7 @@ DetectionDataset, ) from supervision.detection.annotate import BoxAnnotator -from supervision.detection.core import Detections +from supervision.detection.core import Detections, merge_object_detection_pair from supervision.detection.line_zone import LineZone, LineZoneAnnotator from supervision.detection.tools.csv_sink import CSVSink from supervision.detection.tools.inference_slicer import InferenceSlicer @@ -43,7 +43,9 @@ from supervision.detection.tools.polygon_zone import PolygonZone, PolygonZoneAnnotator from supervision.detection.tools.smoother import DetectionsSmoother from supervision.detection.utils import ( + batch_box_non_max_merge, box_iou_batch, + box_non_max_merge, box_non_max_suppression, calculate_masks_centroids, clip_boxes, diff --git a/supervision/detection/core.py b/supervision/detection/core.py index 0777571fc..1b3a385de 100644 --- a/supervision/detection/core.py +++ b/supervision/detection/core.py @@ -8,7 +8,7 @@ from supervision.config import CLASS_NAME_DATA_FIELD, ORIENTED_BOX_COORDINATES from supervision.detection.utils import ( - box_batch_non_max_merge, + batch_box_non_max_merge, box_iou_batch, box_non_max_merge, box_non_max_suppression, @@ -1226,7 +1226,7 @@ def with_nmm( self.class_id.reshape(-1, 1), ) ) - keep_to_merge_list = box_batch_non_max_merge(predictions, threshold) + keep_to_merge_list = batch_box_non_max_merge(predictions, threshold) result = [] for keep_ind, merge_ind_list in keep_to_merge_list.items(): diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py index f177d0886..c2f02c1b9 100644 --- a/supervision/detection/utils.py +++ b/supervision/detection/utils.py @@ -353,8 +353,8 @@ def box_non_max_merge( return keep_to_merge_list -def box_batch_non_max_merge( - predictions: np.ndarray, threshold: float = 0.5 +def batch_box_non_max_merge( + predictions: np.ndarray, iou_threshold: float = 0.5 ) -> Dict[int, List[int]]: """ Apply greedy version of non-maximum merging per category to avoid detecting @@ -364,7 +364,7 @@ def box_batch_non_max_merge( predictions (np.ndarray): An array of shape `(n, 6)` containing the bounding boxes coordinates in format `[x1, y1, x2, y2]`, the confidence scores and class_ids. - threshold (float, optional): The intersection-over-union threshold + iou_threshold (float, optional): The intersection-over-union threshold to use for non-maximum suppression. Defaults to 0.5. Returns: @@ -376,7 +376,7 @@ def box_batch_non_max_merge( for category_id in np.unique(category_ids): curr_indices = np.where(category_ids == category_id)[0] curr_keep_to_merge_list = box_non_max_merge( - predictions[curr_indices], threshold + predictions[curr_indices], iou_threshold ) curr_indices_list = curr_indices.tolist() for curr_keep, curr_merge_list in curr_keep_to_merge_list.items(): From d2d50fbe467ca3fec33e46619c63ac0548ced50b Mon Sep 17 00:00:00 2001 From: Linas Kondrackis Date: Wed, 15 May 2024 09:26:18 +0300 Subject: [PATCH 13/26] renamed bbox -> xyxy --- supervision/detection/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py index c2f02c1b9..f6308f57a 100644 --- a/supervision/detection/utils.py +++ b/supervision/detection/utils.py @@ -458,7 +458,7 @@ def mask_to_xyxy(masks: np.ndarray) -> np.ndarray: `(x_min, y_min, x_max, y_max)` for each mask """ n = masks.shape[0] - bboxes = np.zeros((n, 4), dtype=int) + xyxy = np.zeros((n, 4), dtype=int) for i, mask in enumerate(masks): rows, cols = np.where(mask) @@ -466,9 +466,9 @@ def mask_to_xyxy(masks: np.ndarray) -> np.ndarray: if len(rows) > 0 and len(cols) > 0: x_min, x_max = np.min(cols), np.max(cols) y_min, y_max = np.min(rows), np.max(rows) - bboxes[i, :] = [x_min, y_min, x_max, y_max] + xyxy[i, :] = [x_min, y_min, x_max, y_max] - return bboxes + return xyxy def mask_to_polygons(mask: np.ndarray) -> List[np.ndarray]: From 2d740bdcb6b197f6aefe7436a718191c53884042 Mon Sep 17 00:00:00 2001 From: Linas Kondrackis Date: Wed, 15 May 2024 09:38:58 +0300 Subject: [PATCH 14/26] fix: merge_object_detection_pair --- supervision/detection/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/supervision/detection/core.py b/supervision/detection/core.py index 1b3a385de..76224bb72 100644 --- a/supervision/detection/core.py +++ b/supervision/detection/core.py @@ -1233,7 +1233,7 @@ def with_nmm( for merge_ind in merge_ind_list: box_iou = box_iou_batch(self[keep_ind].xyxy, self[merge_ind].xyxy)[0] if box_iou > threshold: - merged_detection = self.merge_object_detection_pair( + merged_detection = merge_object_detection_pair( self[keep_ind], self[merge_ind] ) self._set_at_index(keep_ind, merged_detection) From 145b5fe56c1b1daec6e8161fece90a5f23155c76 Mon Sep 17 00:00:00 2001 From: Linas Kondrackis Date: Wed, 15 May 2024 10:46:56 +0300 Subject: [PATCH 15/26] Rename to batch_box_non_max_merge to box_non_max_merge_batch --- supervision/__init__.py | 2 +- supervision/detection/core.py | 4 ++-- supervision/detection/utils.py | 8 +------- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/supervision/__init__.py b/supervision/__init__.py index 3eae2e178..03f52086f 100644 --- a/supervision/__init__.py +++ b/supervision/__init__.py @@ -43,9 +43,9 @@ from supervision.detection.tools.polygon_zone import PolygonZone, PolygonZoneAnnotator from supervision.detection.tools.smoother import DetectionsSmoother from supervision.detection.utils import ( - batch_box_non_max_merge, box_iou_batch, box_non_max_merge, + box_non_max_merge_batch, box_non_max_suppression, calculate_masks_centroids, clip_boxes, diff --git a/supervision/detection/core.py b/supervision/detection/core.py index 76224bb72..2489ef801 100644 --- a/supervision/detection/core.py +++ b/supervision/detection/core.py @@ -8,9 +8,9 @@ from supervision.config import CLASS_NAME_DATA_FIELD, ORIENTED_BOX_COORDINATES from supervision.detection.utils import ( - batch_box_non_max_merge, box_iou_batch, box_non_max_merge, + box_non_max_merge_batch, box_non_max_suppression, calculate_masks_centroids, extract_ultralytics_masks, @@ -1226,7 +1226,7 @@ def with_nmm( self.class_id.reshape(-1, 1), ) ) - keep_to_merge_list = batch_box_non_max_merge(predictions, threshold) + keep_to_merge_list = box_non_max_merge_batch(predictions, threshold) result = [] for keep_ind, merge_ind_list in keep_to_merge_list.items(): diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py index f6308f57a..c159de596 100644 --- a/supervision/detection/utils.py +++ b/supervision/detection/utils.py @@ -300,18 +300,12 @@ def box_non_max_merge( y2 = predictions[:, 3] scores = predictions[:, 4] - areas = (x2 - x1) * (y2 - y1) order = scores.argsort() - keep = [] - while len(order) > 0: idx = order[-1] - - keep.append(idx.tolist()) - order = order[:-1] if len(order) == 0: @@ -353,7 +347,7 @@ def box_non_max_merge( return keep_to_merge_list -def batch_box_non_max_merge( +def box_non_max_merge_batch( predictions: np.ndarray, iou_threshold: float = 0.5 ) -> Dict[int, List[int]]: """ From 6c4093526607b4b37db4f2bcb05087ef53db83ad Mon Sep 17 00:00:00 2001 From: Linas Kondrackis Date: Wed, 15 May 2024 11:32:30 +0300 Subject: [PATCH 16/26] box_non_max_merge: use our functions to compute iou --- supervision/detection/utils.py | 35 +++++----------------------------- 1 file changed, 5 insertions(+), 30 deletions(-) diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py index c159de596..cb2545522 100644 --- a/supervision/detection/utils.py +++ b/supervision/detection/utils.py @@ -294,14 +294,7 @@ def box_non_max_merge( """ keep_to_merge_list = {} - x1 = predictions[:, 0] - y1 = predictions[:, 1] - x2 = predictions[:, 2] - y2 = predictions[:, 3] - scores = predictions[:, 4] - areas = (x2 - x1) * (y2 - y1) - order = scores.argsort() while len(order) > 0: @@ -312,30 +305,12 @@ def box_non_max_merge( keep_to_merge_list[idx.tolist()] = [] break - xx1 = np.take(x1, axis=0, indices=order) - xx2 = np.take(x2, axis=0, indices=order) - yy1 = np.take(y1, axis=0, indices=order) - yy2 = np.take(y2, axis=0, indices=order) - - xx1 = np.maximum(xx1, x1[idx]) - yy1 = np.maximum(yy1, y1[idx]) - xx2 = np.minimum(xx2, x2[idx]) - yy2 = np.minimum(yy2, y2[idx]) - - w = np.maximum(0, xx2 - xx1) - h = np.maximum(0, yy2 - yy1) - - inter = w * h - - rem_areas = np.take(areas, axis=0, indices=order) - - union = (rem_areas - inter) + areas[idx] - match_metric_value = inter / union + candidate = np.expand_dims(predictions[idx], axis=0) + ious = box_iou_batch(predictions[order][:, :4], candidate[:, :4]) - mask = match_metric_value < iou_threshold - mask = mask.astype(np.uint8) - matched_box_indices = np.flip(order[np.where(mask == 0)[0]]) - unmatched_indices = order[np.where(mask == 1)[0]] + mask = ious < iou_threshold + matched_box_indices = np.flip(order[np.where(mask is False)[0]]) + unmatched_indices = order[np.where(mask is True)[0]] order = unmatched_indices[scores[unmatched_indices].argsort()] From 53f345e91614a72b20a1f19c04d5369fa17a26ed Mon Sep 17 00:00:00 2001 From: Linas Kondrackis Date: Wed, 15 May 2024 11:35:59 +0300 Subject: [PATCH 17/26] Minor renaming --- supervision/detection/utils.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py index cb2545522..7985c7391 100644 --- a/supervision/detection/utils.py +++ b/supervision/detection/utils.py @@ -299,18 +299,18 @@ def box_non_max_merge( while len(order) > 0: idx = order[-1] - order = order[:-1] + merge_candidate = np.expand_dims(predictions[idx], axis=0) + order = order[:-1] if len(order) == 0: keep_to_merge_list[idx.tolist()] = [] break - candidate = np.expand_dims(predictions[idx], axis=0) - ious = box_iou_batch(predictions[order][:, :4], candidate[:, :4]) + ious = box_iou_batch(predictions[order][:, :4], merge_candidate[:, :4]) - mask = ious < iou_threshold - matched_box_indices = np.flip(order[np.where(mask is False)[0]]) - unmatched_indices = order[np.where(mask is True)[0]] + below_threshold = ious < iou_threshold + matched_box_indices = np.flip(order[np.where(below_threshold is False)[0]]) + unmatched_indices = order[np.where(below_threshold is True)[0]] order = unmatched_indices[scores[unmatched_indices].argsort()] From 0e2eec08c8ed9ccc4ae21f63ca8a6f3ae658ca94 Mon Sep 17 00:00:00 2001 From: Linas Kondrackis Date: Wed, 15 May 2024 11:48:48 +0300 Subject: [PATCH 18/26] Revert np.bool comparisons with `is` * Ruff complains when `== True` is used * Different behaviour with `is True` --- supervision/detection/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py index 7985c7391..56420ed6e 100644 --- a/supervision/detection/utils.py +++ b/supervision/detection/utils.py @@ -308,9 +308,9 @@ def box_non_max_merge( ious = box_iou_batch(predictions[order][:, :4], merge_candidate[:, :4]) - below_threshold = ious < iou_threshold - matched_box_indices = np.flip(order[np.where(below_threshold is False)[0]]) - unmatched_indices = order[np.where(below_threshold is True)[0]] + below_threshold = (ious < iou_threshold).astype(np.uint8) + matched_box_indices = np.flip(order[np.where(below_threshold == 0)[0]]) + unmatched_indices = order[np.where(below_threshold == 1)[0]] order = unmatched_indices[scores[unmatched_indices].argsort()] From 559ef90d83507994091cc7d0f76fa79ce9b7a8c1 Mon Sep 17 00:00:00 2001 From: Linas Kondrackis Date: Wed, 15 May 2024 11:58:15 +0300 Subject: [PATCH 19/26] Simplify box_non_max_merge --- supervision/detection/utils.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py index 56420ed6e..85b741c35 100644 --- a/supervision/detection/utils.py +++ b/supervision/detection/utils.py @@ -292,7 +292,7 @@ def box_non_max_merge( Dict[int, List[int]]: Mapping from prediction indices to keep to a list of prediction indices to be merged. """ - keep_to_merge_list = {} + keep_to_merge_list: Dict[int, List[int]] = {} scores = predictions[:, 4] order = scores.argsort() @@ -307,17 +307,11 @@ def box_non_max_merge( break ious = box_iou_batch(predictions[order][:, :4], merge_candidate[:, :4]) + ious = ious.flatten() - below_threshold = (ious < iou_threshold).astype(np.uint8) - matched_box_indices = np.flip(order[np.where(below_threshold == 0)[0]]) - unmatched_indices = order[np.where(below_threshold == 1)[0]] - - order = unmatched_indices[scores[unmatched_indices].argsort()] - - keep_to_merge_list[idx.tolist()] = [] - - for matched_box_ind in matched_box_indices.tolist(): - keep_to_merge_list[idx.tolist()].append(matched_box_ind) + above_threshold = ious >= iou_threshold + keep_to_merge_list[idx] = np.flip(order[above_threshold]).tolist() + order = order[~above_threshold] return keep_to_merge_list From f8f3647a983529aa2e7f2bff8599d33b2a7ebe83 Mon Sep 17 00:00:00 2001 From: Linas Kondrackis Date: Wed, 15 May 2024 15:32:26 +0300 Subject: [PATCH 20/26] Removed suprplus NMM code for 20% speedup --- supervision/detection/core.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/supervision/detection/core.py b/supervision/detection/core.py index 2489ef801..2f358c6b7 100644 --- a/supervision/detection/core.py +++ b/supervision/detection/core.py @@ -8,7 +8,6 @@ from supervision.config import CLASS_NAME_DATA_FIELD, ORIENTED_BOX_COORDINATES from supervision.detection.utils import ( - box_iou_batch, box_non_max_merge, box_non_max_merge_batch, box_non_max_suppression, @@ -1231,12 +1230,10 @@ def with_nmm( result = [] for keep_ind, merge_ind_list in keep_to_merge_list.items(): for merge_ind in merge_ind_list: - box_iou = box_iou_batch(self[keep_ind].xyxy, self[merge_ind].xyxy)[0] - if box_iou > threshold: - merged_detection = merge_object_detection_pair( - self[keep_ind], self[merge_ind] - ) - self._set_at_index(keep_ind, merged_detection) + merged_detection = merge_object_detection_pair( + self[keep_ind], self[merge_ind] + ) + self._set_at_index(keep_ind, merged_detection) result.append(self[keep_ind]) return Detections.merge(result) From 9024396f6c49f5f5496dac5347859f07721e1f76 Mon Sep 17 00:00:00 2001 From: Linas Kondrackis Date: Fri, 17 May 2024 10:58:45 +0300 Subject: [PATCH 21/26] Add npt.NDarray[x] types, remove resolution_wh default val --- supervision/detection/utils.py | 58 +++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 26 deletions(-) diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py index 85b741c35..db33ab01d 100644 --- a/supervision/detection/utils.py +++ b/supervision/detection/utils.py @@ -3,6 +3,7 @@ import cv2 import numpy as np +import numpy.typing as npt from supervision.config import CLASS_NAME_DATA_FIELD @@ -275,14 +276,14 @@ def box_non_max_suppression( def box_non_max_merge( - predictions: np.ndarray, iou_threshold: float = 0.5 + predictions: npt.NDArray[np.float64], iou_threshold: float = 0.5 ) -> Dict[int, List[int]]: """ Apply greedy version of non-maximum merging to avoid detecting too many overlapping bounding boxes for a given object. Args: - predictions (np.ndarray): An array of shape `(n, 5)` containing + predictions (npt.NDArray[np.float64]): An array of shape `(n, 5)` containing the bounding boxes coordinates in format `[x1, y1, x2, y2]` and the confidence scores. iou_threshold (float, optional): The intersection-over-union threshold @@ -317,14 +318,14 @@ def box_non_max_merge( def box_non_max_merge_batch( - predictions: np.ndarray, iou_threshold: float = 0.5 + predictions: npt.NDArray[np.float64], iou_threshold: float = 0.5 ) -> Dict[int, List[int]]: """ Apply greedy version of non-maximum merging per category to avoid detecting too many overlapping bounding boxes for a given object. Args: - predictions (np.ndarray): An array of shape `(n, 6)` containing + predictions (npt.NDArray[np.float64]): An array of shape `(n, 6)` containing the bounding boxes coordinates in format `[x1, y1, x2, y2]`, the confidence scores and class_ids. iou_threshold (float, optional): The intersection-over-union threshold @@ -667,16 +668,18 @@ def process_roboflow_result( return xyxy, confidence, class_id, masks, tracker_id, data -def move_boxes(xyxy: np.ndarray, offset: np.ndarray) -> np.ndarray: +def move_boxes( + xyxy: npt.NDArray[np.float64], offset: npt.NDArray[np.int32] +) -> npt.NDArray[np.float64]: """ Parameters: - xyxy (np.ndarray): An array of shape `(n, 4)` containing the bounding boxes - coordinates in format `[x1, y1, x2, y2]` + xyxy (npt.NDArray[np.float64]): An array of shape `(n, 4)` containing the + bounding boxes coordinates in format `[x1, y1, x2, y2]` offset (np.array): An array of shape `(2,)` containing offset values in format is `[dx, dy]`. Returns: - np.ndarray: Repositioned bounding boxes. + npt.NDArray[np.float64]: Repositioned bounding boxes. Example: ```python @@ -697,24 +700,25 @@ def move_boxes(xyxy: np.ndarray, offset: np.ndarray) -> np.ndarray: def move_masks( - masks: np.ndarray, - offset: np.ndarray, - resolution_wh: Tuple[int, int] = None, -) -> np.ndarray: + masks: npt.NDArray[np.bool_], + offset: npt.NDArray[np.int32], + resolution_wh: Tuple[int, int], +) -> npt.NDArray[np.bool_]: """ Offset the masks in an array by the specified (x, y) amount. Args: - masks (np.ndarray): A 3D array of binary masks corresponding to the predictions. - Shape: `(N, H, W)`, where N is the number of predictions, and H, W are the - dimensions of each mask. - offset (np.ndarray): An array of shape `(2,)` containing non-negative int values - `[dx, dy]`. + masks (npt.NDArray[np.bool_]): A 3D array of binary masks corresponding to the + predictions. Shape: `(N, H, W)`, where N is the number of predictions, and + H, W are the dimensions of each mask. + offset (npt.NDArray[np.int32]): An array of shape `(2,)` containing non-negative + int values `[dx, dy]`. resolution_wh (Tuple[int, int]): The width and height of the desired mask resolution. Returns: - (np.ndarray) repositioned masks, optionally padded to the specified shape. + (npt.NDArray[np.bool_]) repositioned masks, optionally padded to the specified + shape. """ if offset[0] < 0 or offset[1] < 0: @@ -730,19 +734,21 @@ def move_masks( return mask_array -def scale_boxes(xyxy: np.ndarray, factor: float) -> np.ndarray: +def scale_boxes( + xyxy: npt.NDArray[np.float64], factor: float +) -> npt.NDArray[np.float64]: """ Scale the dimensions of bounding boxes. Parameters: - xyxy (np.ndarray): An array of shape `(n, 4)` containing the bounding boxes - coordinates in format `[x1, y1, x2, y2]` + xyxy (npt.NDArray[np.float64]): An array of shape `(n, 4)` containing the + bounding boxes coordinates in format `[x1, y1, x2, y2]` factor (float): A float value representing the factor by which the box dimensions are scaled. A factor greater than 1 enlarges the boxes, while a factor less than 1 shrinks them. Returns: - np.ndarray: Scaled bounding boxes. + npt.NDArray[np.float64]: Scaled bounding boxes. Example: ```python @@ -810,19 +816,19 @@ def is_data_equal(data_a: Dict[str, np.ndarray], data_b: Dict[str, np.ndarray]) def merge_data( - data_list: List[Dict[str, Union[np.ndarray, List]]], -) -> Dict[str, Union[np.ndarray, List]]: + data_list: List[Dict[str, Union[npt.NDArray[np.generic], List]]], +) -> Dict[str, Union[npt.NDArray[np.generic], List]]: """ Merges the data payloads of a list of Detections instances. Args: data_list: The data payloads of the Detections instances. Each data payload is a dictionary with the same keys, and the values are either lists or - np.ndarray. + npt.NDArray[np.generic]. Returns: A single data payload containing the merged data, preserving the original data - types (list or np.ndarray). + types (list or npt.NDArray[np.generic]). Raises: ValueError: If data values within a single object have different lengths or if From 6fbca8333e373d06312e823e03ef8899208f1a7a Mon Sep 17 00:00:00 2001 From: Linas Kondrackis Date: Thu, 23 May 2024 16:01:34 +0300 Subject: [PATCH 22/26] Address review comments, simplify merge * Reintroduced iou check before response - necessary for algorithm --- supervision/__init__.py | 3 +- supervision/detection/core.py | 137 ++++++++++++++++++++++----------- supervision/detection/utils.py | 118 +++++++++++++++++----------- test/detection/test_core.py | 77 ++++++++++++++---- test/detection/test_utils.py | 56 ++++++++------ 5 files changed, 261 insertions(+), 130 deletions(-) diff --git a/supervision/__init__.py b/supervision/__init__.py index 03f52086f..816142b90 100644 --- a/supervision/__init__.py +++ b/supervision/__init__.py @@ -35,7 +35,7 @@ DetectionDataset, ) from supervision.detection.annotate import BoxAnnotator -from supervision.detection.core import Detections, merge_object_detection_pair +from supervision.detection.core import Detections from supervision.detection.line_zone import LineZone, LineZoneAnnotator from supervision.detection.tools.csv_sink import CSVSink from supervision.detection.tools.inference_slicer import InferenceSlicer @@ -45,7 +45,6 @@ from supervision.detection.utils import ( box_iou_batch, box_non_max_merge, - box_non_max_merge_batch, box_non_max_suppression, calculate_masks_centroids, clip_boxes, diff --git a/supervision/detection/core.py b/supervision/detection/core.py index 2f358c6b7..6abc8dadd 100644 --- a/supervision/detection/core.py +++ b/supervision/detection/core.py @@ -8,8 +8,8 @@ from supervision.config import CLASS_NAME_DATA_FIELD, ORIENTED_BOX_COORDINATES from supervision.detection.utils import ( + box_iou_batch, box_non_max_merge, - box_non_max_merge_batch, box_non_max_suppression, calculate_masks_centroids, extract_ultralytics_masks, @@ -1198,24 +1198,21 @@ def with_nmm( after non-maximum merging. Raises: - AssertionError: If `confidence` is None and class_agnostic is False. - If `class_id` is None and class_agnostic is False. + AssertionError: If `confidence` is None or `class_id` is None and + class_agnostic is False. """ if len(self) == 0: return self - assert 0.0 <= threshold <= 1.0, "Threshold must be between 0 and 1." - assert ( self.confidence is not None ), "Detections confidence must be given for NMM to be executed." if class_agnostic: predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1))) - keep_to_merge_list = box_non_max_merge(predictions, threshold) else: assert self.class_id is not None, ( - "Detections class_id must be given for NMS to be executed. If you" + "Detections class_id must be given for NMM to be executed. If you" " intended to perform class agnostic NMM set class_agnostic=True." ) predictions = np.hstack( @@ -1225,21 +1222,25 @@ def with_nmm( self.class_id.reshape(-1, 1), ) ) - keep_to_merge_list = box_non_max_merge_batch(predictions, threshold) + + merge_groups = box_non_max_merge( + predictions=predictions, iou_threshold=threshold + ) result = [] - for keep_ind, merge_ind_list in keep_to_merge_list.items(): - for merge_ind in merge_ind_list: - merged_detection = merge_object_detection_pair( - self[keep_ind], self[merge_ind] - ) - self._set_at_index(keep_ind, merged_detection) - result.append(self[keep_ind]) + for merge_group in merge_groups: + unmerged_detections = [self[i] for i in merge_group] + merged_detections = _merge_inner_detections_objects( + unmerged_detections, threshold + ) + result.append(merged_detections) return Detections.merge(result) -def merge_object_detection_pair(det1: Detections, det2: Detections) -> Detections: +def _merge_inner_detection_object_pair( + detections_1: Detections, detections_2: Detections +) -> Detections: """ Merges two Detections object into a single Detections object. Assumes each Detections contains exactly one object. @@ -1254,9 +1255,9 @@ def merge_object_detection_pair(det1: Detections, det2: Detections) -> Detection single bounding box and mask, respectively. Args: - det1 (Detections): + detections_1 (Detections): The first Detections object - det2 (Detections): + detections_2 (Detections): The second Detections object Returns: @@ -1282,51 +1283,99 @@ def merge_object_detection_pair(det1: Detections, det2: Detections) -> Detection detections[0], detections[1]) ``` """ - if len(det1) != 1 or len(det2) != 1: + if len(detections_1) != 1 or len(detections_2) != 1: raise ValueError("Both Detections should have exactly 1 detected object.") - if det2.confidence is None: - winning_det = det1 - elif det1.confidence is None: - winning_det = det2 - elif det1.confidence[0] >= det2.confidence[0]: - winning_det = det1 + _verify_fields_both_defined_or_none(detections_1, detections_2) + + if detections_1.confidence is None and detections_2.confidence is None: + merged_confidence = None else: - winning_det = det2 + area_det1 = (detections_1.xyxy[0][2] - detections_1.xyxy[0][0]) * ( + detections_1.xyxy[0][3] - detections_1.xyxy[0][1] + ) + area_det2 = (detections_2.xyxy[0][2] - detections_2.xyxy[0][0]) * ( + detections_2.xyxy[0][3] - detections_2.xyxy[0][1] + ) + merged_confidence = ( + area_det1 * detections_1.confidence[0] + + area_det2 * detections_2.confidence[0] + ) / (area_det1 + area_det2) + merged_confidence = np.array([merged_confidence]) - area_det1 = (det1.xyxy[0][2] - det1.xyxy[0][0]) * ( - det1.xyxy[0][3] - det1.xyxy[0][1] + merged_x1, merged_y1 = np.minimum( + detections_1.xyxy[0][:2], detections_2.xyxy[0][:2] ) - area_det2 = (det2.xyxy[0][2] - det2.xyxy[0][0]) * ( - det2.xyxy[0][3] - det2.xyxy[0][1] + merged_x2, merged_y2 = np.maximum( + detections_1.xyxy[0][2:], detections_2.xyxy[0][2:] ) + merged_xyxy = np.array([[merged_x1, merged_y1, merged_x2, merged_y2]]) - merged_x1, merged_y1 = np.minimum(det1.xyxy[0][:2], det2.xyxy[0][:2]) - merged_x2, merged_y2 = np.maximum(det1.xyxy[0][2:], det2.xyxy[0][2:]) - merged_xy = np.array([[merged_x1, merged_y1, merged_x2, merged_y2]]) - - if det2.mask is None or det1.mask is None: - merged_mask = winning_det.mask + if detections_1.mask is None and detections_2.mask is None: + merged_mask = None else: - merged_mask = np.logical_or(det1.mask, det2.mask) + merged_mask = np.logical_or(detections_1.mask, detections_2.mask) - if det1.confidence is None or det2.confidence is None: - merged_confidence = winning_det.confidence + if detections_1.confidence is None and detections_2.confidence is None: + winning_det = detections_1 + elif detections_1.confidence[0] >= detections_2.confidence[0]: + winning_det = detections_1 else: - merged_confidence = ( - area_det1 * det1.confidence[0] + area_det2 * det2.confidence[0] - ) / (area_det1 + area_det2) - merged_confidence = np.array([merged_confidence]) + winning_det = detections_2 winning_class_id = winning_det.class_id winning_tracker_id = winning_det.tracker_id winning_data = winning_det.data return Detections( - xyxy=merged_xy, + xyxy=merged_xyxy, mask=merged_mask, confidence=merged_confidence, class_id=winning_class_id, tracker_id=winning_tracker_id, data=winning_data, ) + + +def _merge_inner_detections_objects( + detections: List[Detections], threshold=0.5 +) -> Detections: + """ + Given N detections each of length 1 (exactly one object inside), combine them into a + single detection object of length 1. The contained inner object will be the merged + result of all the input detections. + + For example, this lets you merge N boxes into one big box, N masks into one mask, + etc. + """ + detections_1 = detections[0] + for detections_2 in detections[1:]: + box_iou = box_iou_batch(detections_1.xyxy, detections_2.xyxy)[0] + if box_iou < threshold: + break + detections_1 = _merge_inner_detection_object_pair(detections_1, detections_2) + return detections_1 + + +def _verify_fields_both_defined_or_none( + detections_1: Detections, detections_2: Detections +) -> None: + """ + Verify that for each optional field in the Detections, both instances either have + the field set to None or both have it set to non-None values. + + `data` field is ignored. + + Raises: + ValueError: If one field is None and the other is not, for any of the fields. + """ + attributes = ["mask", "confidence", "class_id", "tracker_id"] + for attribute in attributes: + value_1 = getattr(detections_1, attribute) + value_2 = getattr(detections_2, attribute) + + if (value_1 is None) != (value_2 is None): + raise ValueError( + f"Field '{attribute}' should be consistently None or not None in both " + "Detections." + ) diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py index db33ab01d..b8b8f7c19 100644 --- a/supervision/detection/utils.py +++ b/supervision/detection/utils.py @@ -56,7 +56,8 @@ def box_area(box): top_left = np.maximum(boxes_true[:, None, :2], boxes_detection[:, :2]) bottom_right = np.minimum(boxes_true[:, None, 2:], boxes_detection[:, 2:]) - area_inter = np.prod(np.clip(bottom_right - top_left, a_min=0, a_max=None), 2) + area_inter = np.prod( + np.clip(bottom_right - top_left, a_min=0, a_max=None), 2) return area_inter / (area_true[:, None] + area_detection - area_inter) @@ -81,7 +82,8 @@ def _mask_iou_batch_split( masks_true_area = masks_true.sum(axis=(1, 2)) masks_detection_area = masks_detection.sum(axis=(1, 2)) - union_area = masks_true_area[:, None] + masks_detection_area - intersection_area + union_area = masks_true_area[:, None] + \ + masks_detection_area - intersection_area return np.divide( intersection_area, @@ -132,7 +134,8 @@ def mask_iou_batch( 1, ) for i in range(0, masks_true.shape[0], step): - ious.append(_mask_iou_batch_split(masks_true[i : i + step], masks_detection)) + ious.append(_mask_iou_batch_split( + masks_true[i: i + step], masks_detection)) return np.vstack(ious) @@ -162,7 +165,8 @@ def resize_masks(masks: np.ndarray, max_dimension: int = 640) -> np.ndarray: resized_masks = masks[:, yv, xv] - resized_masks = resized_masks.reshape(masks.shape[0], new_height, new_width) + resized_masks = resized_masks.reshape( + masks.shape[0], new_height, new_width) return resized_masks @@ -215,8 +219,9 @@ def mask_non_max_suppression( keep = np.ones(rows, dtype=bool) for i in range(rows): if keep[i]: - condition = (ious[i] > iou_threshold) & (categories[i] == categories) - keep[i + 1 :] = np.where(condition[i + 1 :], False, keep[i + 1 :]) + condition = (ious[i] > iou_threshold) & ( + categories[i] == categories) + keep[i + 1:] = np.where(condition[i + 1:], False, keep[i + 1:]) return keep[sort_index.argsort()] @@ -275,9 +280,9 @@ def box_non_max_suppression( return keep[sort_index.argsort()] -def box_non_max_merge( +def _box_non_max_merge_all( predictions: npt.NDArray[np.float64], iou_threshold: float = 0.5 -) -> Dict[int, List[int]]: +) -> List[List[int]]: """ Apply greedy version of non-maximum merging to avoid detecting too many overlapping bounding boxes for a given object. @@ -290,64 +295,74 @@ def box_non_max_merge( to use for non-maximum suppression. Defaults to 0.5. Returns: - Dict[int, List[int]]: Mapping from prediction indices - to keep to a list of prediction indices to be merged. + List[List[int]]: Groups of prediction indices be merged. + Each group may have 1 or more elements. """ - keep_to_merge_list: Dict[int, List[int]] = {} + merge_groups: List[List[int]] = [] scores = predictions[:, 4] order = scores.argsort() while len(order) > 0: - idx = order[-1] - merge_candidate = np.expand_dims(predictions[idx], axis=0) + idx = int(order[-1]) order = order[:-1] if len(order) == 0: - keep_to_merge_list[idx.tolist()] = [] + merge_groups.append([idx]) break + merge_candidate = np.expand_dims(predictions[idx], axis=0) ious = box_iou_batch(predictions[order][:, :4], merge_candidate[:, :4]) ious = ious.flatten() above_threshold = ious >= iou_threshold - keep_to_merge_list[idx] = np.flip(order[above_threshold]).tolist() + merge_group = [idx] + np.flip(order[above_threshold]).tolist() + merge_groups.append(merge_group) order = order[~above_threshold] - - return keep_to_merge_list + return merge_groups -def box_non_max_merge_batch( - predictions: npt.NDArray[np.float64], iou_threshold: float = 0.5 -) -> Dict[int, List[int]]: +def box_non_max_merge( + predictions: npt.NDArray[np.float64], + iou_threshold: float = 0.5, +) -> List[List[int]]: """ Apply greedy version of non-maximum merging per category to avoid detecting too many overlapping bounding boxes for a given object. Args: - predictions (npt.NDArray[np.float64]): An array of shape `(n, 6)` containing - the bounding boxes coordinates in format `[x1, y1, x2, y2]`, - the confidence scores and class_ids. + predictions (npt.NDArray[np.float64]): An array of shape `(n, 5)` or `(n, 6)` + containing the bounding boxes coordinates in format `[x1, y1, x2, y2]`, + the confidence scores and class_ids. Omit class_id column to allow + detections of different classes to be merged. iou_threshold (float, optional): The intersection-over-union threshold to use for non-maximum suppression. Defaults to 0.5. Returns: - Dict[int, List[int]]: Mapping from prediction indices - to keep to a list of prediction indices to be merged. + List[List[int]]: Groups of prediction indices be merged. + Each group may have 1 or more elements. """ + if predictions.shape[1] == 5: + return _box_non_max_merge_all(predictions, iou_threshold) + category_ids = predictions[:, 5] - keep_to_merge_list = {} + merge_groups = [] for category_id in np.unique(category_ids): curr_indices = np.where(category_ids == category_id)[0] - curr_keep_to_merge_list = box_non_max_merge( + merge_class_groups = _box_non_max_merge_all( predictions[curr_indices], iou_threshold ) - curr_indices_list = curr_indices.tolist() - for curr_keep, curr_merge_list in curr_keep_to_merge_list.items(): - keep = curr_indices_list[curr_keep] - merge_list = [curr_indices_list[i] for i in curr_merge_list] - keep_to_merge_list[keep] = merge_list - return keep_to_merge_list + + for merge_class_group in merge_class_groups: + merge_groups.append(curr_indices[merge_class_group].tolist()) + + for merge_group in merge_groups: + if len(merge_group) == 0: + raise ValueError( + f"Empty group detected when non-max-merging " + f"detections: {merge_groups}" + ) + return merge_groups def clip_boxes(xyxy: np.ndarray, resolution_wh: Tuple[int, int]) -> np.ndarray: @@ -552,7 +567,8 @@ def approximate_polygon( approximated_points = polygon while True: epsilon += epsilon_step - new_approximated_points = cv2.approxPolyDP(polygon, epsilon, closed=True) + new_approximated_points = cv2.approxPolyDP( + polygon, epsilon, closed=True) if len(new_approximated_points) > target_points: approximated_points = new_approximated_points else: @@ -581,7 +597,8 @@ def extract_ultralytics_masks(yolov8_results) -> Optional[np.ndarray]: ) top, left = int(pad[1]), int(pad[0]) - bottom, right = int(inference_shape[0] - pad[1]), int(inference_shape[1] - pad[0]) + bottom, right = int( + inference_shape[0] - pad[1]), int(inference_shape[1] - pad[0]) mask_maps = [] masks = yolov8_results.masks.data.cpu().numpy() @@ -648,7 +665,8 @@ def process_roboflow_result( polygon = np.array( [[point["x"], point["y"]] for point in prediction["points"]], dtype=int ) - mask = polygon_to_mask(polygon, resolution_wh=(image_width, image_height)) + mask = polygon_to_mask( + polygon, resolution_wh=(image_width, image_height)) xyxy.append([x_min, y_min, x_max, y_max]) class_id.append(prediction["class_id"]) class_name.append(prediction["class"]) @@ -659,10 +677,12 @@ def process_roboflow_result( xyxy = np.array(xyxy) if len(xyxy) > 0 else np.empty((0, 4)) confidence = np.array(confidence) if len(confidence) > 0 else np.empty(0) - class_id = np.array(class_id).astype(int) if len(class_id) > 0 else np.empty(0) + class_id = np.array(class_id).astype( + int) if len(class_id) > 0 else np.empty(0) class_name = np.array(class_name) if len(class_name) > 0 else np.empty(0) masks = np.array(masks, dtype=bool) if len(masks) > 0 else None - tracker_id = np.array(tracker_ids).astype(int) if len(tracker_ids) > 0 else None + tracker_id = np.array(tracker_ids).astype( + int) if len(tracker_ids) > 0 else None data = {CLASS_NAME_DATA_FIELD: class_name} return xyxy, confidence, class_id, masks, tracker_id, data @@ -722,13 +742,15 @@ def move_masks( """ if offset[0] < 0 or offset[1] < 0: - raise ValueError(f"Offset values must be non-negative integers. Got: {offset}") + raise ValueError( + f"Offset values must be non-negative integers. Got: {offset}") - mask_array = np.full((masks.shape[0], resolution_wh[1], resolution_wh[0]), False) + mask_array = np.full( + (masks.shape[0], resolution_wh[1], resolution_wh[0]), False) mask_array[ :, - offset[1] : masks.shape[1] + offset[1], - offset[0] : masks.shape[2] + offset[0], + offset[1]: masks.shape[1] + offset[1], + offset[0]: masks.shape[2] + offset[0], ] = masks return mask_array @@ -794,8 +816,10 @@ def sum_over_mask(indices: np.ndarray, axis: tuple) -> np.ndarray: return np.tensordot(masks, indices, axes=axis) aggregation_axis = ([1, 2], [0, 1]) - centroid_x = sum_over_mask(horizontal_indices, aggregation_axis) / total_pixels - centroid_y = sum_over_mask(vertical_indices, aggregation_axis) / total_pixels + centroid_x = sum_over_mask( + horizontal_indices, aggregation_axis) / total_pixels + centroid_y = sum_over_mask( + vertical_indices, aggregation_axis) / total_pixels return np.column_stack((centroid_x, centroid_y)).astype(int) @@ -873,7 +897,8 @@ def merge_data( elif ndim > 1: merged_data[key] = np.vstack(merged_data[key]) else: - raise ValueError(f"Unexpected array dimension for key '{key}'.") + raise ValueError( + f"Unexpected array dimension for key '{key}'.") else: raise ValueError( f"Inconsistent data types for key '{key}'. Only np.ndarray and list " @@ -918,6 +943,7 @@ def get_data_item( else: raise TypeError(f"Unsupported index type: {type(index)}") else: - raise TypeError(f"Unsupported data type for key '{key}': {type(value)}") + raise TypeError( + f"Unsupported data type for key '{key}': {type(value)}") return subset_data diff --git a/test/detection/test_core.py b/test/detection/test_core.py index 31e56decd..bef511e53 100644 --- a/test/detection/test_core.py +++ b/test/detection/test_core.py @@ -5,7 +5,7 @@ import numpy as np import pytest -from supervision.detection.core import Detections, merge_object_detection_pair +from supervision.detection.core import Detections, _merge_inner_detection_object_pair from supervision.geometry.core import Position PREDICTIONS = np.array( @@ -193,7 +193,8 @@ DoesNotRaise(), ), # take only first detection by index slice (1, 3) (DETECTIONS, 10, None, pytest.raises(IndexError)), # index out of range - (DETECTIONS, [0, 2, 10], None, pytest.raises(IndexError)), # index out of range + (DETECTIONS, [0, 2, 10], None, pytest.raises( + IndexError)), # index out of range (DETECTIONS, np.array([0, 2, 10]), None, pytest.raises(IndexError)), ( DETECTIONS, @@ -482,7 +483,7 @@ def test_equal( data={"key_1": [1]}, ), DoesNotRaise(), - ), # Same confidence - merge box & mask, tiebreak to detection_1 + ), # Same confidence - merge box & mask, tie-break to detection_1 ( mock_detections( xyxy=[[0, 0, 20, 20]], @@ -512,7 +513,7 @@ def test_equal( ), # Different confidence, different area ( mock_detections( - xyxy=[[0, 0, 20, 20]], + xyxy=[[10, 10, 30, 30]], confidence=None, class_id=[1], mask=[np.array([[1, 1, 0], [1, 1, 0], [0, 0, 0]], dtype=bool)], @@ -520,31 +521,79 @@ def test_equal( data={"key_1": [1]}, ), mock_detections( - xyxy=[[10, 10, 30, 30]], - confidence=[0.2], + xyxy=[[20, 20, 40, 40]], + confidence=None, class_id=[2], mask=[np.array([[0, 0, 0], [0, 1, 1], [0, 1, 1]], dtype=bool)], tracker_id=[2], data={"key_2": [2]}, ), mock_detections( - xyxy=[[0, 0, 30, 30]], - confidence=[0.2], - class_id=[2], + xyxy=[[10, 10, 40, 40]], + confidence=None, + class_id=[1], mask=[np.array([[1, 1, 0], [1, 1, 1], [0, 1, 1]], dtype=bool)], - tracker_id=[2], - data={"key_2": [2]}, + tracker_id=[1], + data={"key_1": [1]}, ), DoesNotRaise(), - ), # merge with no confidence + ), # No confidence at all + ( + mock_detections( + xyxy=[[0, 0, 20, 20]], + confidence=None, + ), + mock_detections( + xyxy=[[10, 10, 30, 30]], + confidence=[0.2], + ), + None, + pytest.raises(ValueError), + ), # confidence: None + [x] + ( + mock_detections( + xyxy=[[0, 0, 20, 20]], + mask=[np.array([[1, 1, 0], [1, 1, 0], [0, 0, 0]], dtype=bool)], + ), + mock_detections( + xyxy=[[10, 10, 30, 30]], + mask=None, + ), + None, + pytest.raises(ValueError), + ), # mask: None + [x] + ( + mock_detections( + xyxy=[[0, 0, 20, 20]], + tracker_id=[1] + ), + mock_detections( + xyxy=[[10, 10, 30, 30]], + tracker_id=None, + ), + None, + pytest.raises(ValueError), + ), # tracker_id: None + [] + ( + mock_detections( + xyxy=[[0, 0, 20, 20]], + class_id=[1] + ), + mock_detections( + xyxy=[[10, 10, 30, 30]], + class_id=None, + ), + None, + pytest.raises(ValueError), + ) # class_id: None + [] ], ) -def test_merge_object_detection_pair( +def test_merge_inner_detection_object_pair( detection_1: Detections, detection_2: Detections, expected_result: Optional[Detections], exception: Exception, ): with exception: - result = merge_object_detection_pair(detection_1, detection_2) + result = _merge_inner_detection_object_pair(detection_1, detection_2) assert result == expected_result diff --git a/test/detection/test_utils.py b/test/detection/test_utils.py index e6f330841..cb7537e19 100644 --- a/test/detection/test_utils.py +++ b/test/detection/test_utils.py @@ -6,7 +6,7 @@ from supervision.config import CLASS_NAME_DATA_FIELD from supervision.detection.utils import ( - box_non_max_merge, + _box_non_max_merge_all, box_non_max_suppression, calculate_masks_centroids, clip_boxes, @@ -134,67 +134,67 @@ def test_box_non_max_suppression( ( np.empty(shape=(0, 5), dtype=float), 0.5, - {}, + [], DoesNotRaise(), ), ( np.array([[0, 0, 10, 10, 1.0]]), 0.5, - {0: []}, + [[0]], DoesNotRaise(), ), ( np.array([[0, 0, 10, 10, 1.0], [0, 0, 9, 9, 1.0]]), 0.5, - {1: [0]}, + [[1, 0]], DoesNotRaise(), ), # High overlap, tie-break to second det ( np.array([[0, 0, 10, 10, 1.0], [0, 0, 9, 9, 0.99]]), 0.5, - {0: [1]}, + [[0, 1]], DoesNotRaise(), ), # High overlap, merge to high confidence ( np.array([[0, 0, 10, 10, 0.99], [0, 0, 9, 9, 1.0]]), 0.5, - {1: [0]}, + [[1, 0]], DoesNotRaise(), ), # (test symmetry) High overlap, merge to high confidence ( - np.array([[0, 0, 10, 10, 0.99], [0, 0, 9, 9, 1.0]]), + np.array([[0, 0, 10, 10, 0.90], [0, 0, 9, 9, 1.0]]), 0.5, - {1: [0]}, + [[1, 0]], DoesNotRaise(), ), # (test symmetry) High overlap, merge to high confidence ( np.array([[0, 0, 10, 10, 1.0], [0, 0, 9, 9, 1.0]]), 1.0, - {0: [], 1: []}, + [[1], [0]], DoesNotRaise(), ), # High IOU required ( np.array([[0, 0, 10, 10, 1.0], [0, 0, 9, 9, 1.0]]), 0.0, - {1: [0]}, + [[1, 0]], DoesNotRaise(), ), # No IOU required ( np.array([[0, 0, 10, 10, 1.0], [0, 0, 5, 5, 0.9]]), 0.25, - {0: [1]}, + [[0, 1]], DoesNotRaise(), ), # Below IOU requirement ( np.array([[0, 0, 10, 10, 1.0], [0, 0, 5, 5, 0.9]]), 0.26, - {0: [], 1: []}, + [[0], [1]], DoesNotRaise(), ), # Above IOU requirement ( np.array([[0, 0, 10, 10, 1.0], [0, 0, 9, 9, 1.0], [0, 0, 8, 8, 1.0]]), 0.5, - {2: [1, 0]}, + [[2, 1, 0]], DoesNotRaise(), ), # 3 boxes ( @@ -208,7 +208,7 @@ def test_box_non_max_suppression( ] ), 0.5, - {1: [0], 3: [2], 4: []}, + [[4], [3, 2], [1, 0]], DoesNotRaise(), ), # 5 boxes, 2 merges, 1 separate ( @@ -222,7 +222,7 @@ def test_box_non_max_suppression( ] ), 0.33, - {0: [], 2: [1], 4: [3]}, + [[4, 3], [2, 1], [0]], DoesNotRaise(), ), # sequential merge, half overlap ( @@ -236,7 +236,7 @@ def test_box_non_max_suppression( ] ), 0.33, - {0: [], 2: [3, 1], 4: []}, + [[2, 3, 1], [4], [0]], DoesNotRaise(), ), # confidence ], @@ -244,11 +244,13 @@ def test_box_non_max_suppression( def test_box_non_max_merge( predictions: np.ndarray, iou_threshold: float, - expected_result: Dict[int, List[int]], + expected_result: List[List[int]], exception: Exception, ) -> None: with exception: - result = box_non_max_merge(predictions=predictions, iou_threshold=iou_threshold) + result = _box_non_max_merge_all( + predictions=predictions, iou_threshold=iou_threshold + ) assert result == expected_result @@ -664,7 +666,8 @@ def test_filter_polygons_by_area( "image": {"width": 1000, "height": 1000}, }, ( - np.array([[175.0, 275.0, 225.0, 325.0], [450.0, 450.0, 550.0, 550.0]]), + np.array([[175.0, 275.0, 225.0, 325.0], + [450.0, 450.0, 550.0, 550.0]]), np.array([0.9, 0.8]), np.array([0, 7]), None, @@ -1118,8 +1121,10 @@ def test_calculate_masks_centroids( ), # two data dicts with the same field name and np.array values as 2D arrays ( [ - {"test_1": np.array([1, 2, 3]), "test_2": np.array(["a", "b", "c"])}, - {"test_1": np.array([3, 2, 1]), "test_2": np.array(["c", "b", "a"])}, + {"test_1": np.array([1, 2, 3]), + "test_2": np.array(["a", "b", "c"])}, + {"test_1": np.array([3, 2, 1]), + "test_2": np.array(["c", "b", "a"])}, ], { "test_1": np.array([1, 2, 3, 3, 2, 1]), @@ -1148,8 +1153,10 @@ def test_calculate_masks_centroids( ), # two data dicts with the same field name and 1D and 2D arrays values ( [ - {"test_1": np.array([1, 2, 3]), "test_2": np.array(["a", "b"])}, - {"test_1": np.array([3, 2, 1]), "test_2": np.array(["c", "b", "a"])}, + {"test_1": np.array([1, 2, 3]), + "test_2": np.array(["a", "b"])}, + {"test_1": np.array([3, 2, 1]), + "test_2": np.array(["c", "b", "a"])}, ], None, pytest.raises(ValueError), @@ -1160,7 +1167,8 @@ def test_calculate_masks_centroids( DoesNotRaise(), ), # two data dicts; one empty and one non-empty dict ( - [{"test_1": [], "test_2": []}, {"test_1": [1, 2, 3], "test_2": [1, 2, 3]}], + [{"test_1": [], "test_2": []}, { + "test_1": [1, 2, 3], "test_2": [1, 2, 3]}], {"test_1": [1, 2, 3], "test_2": [1, 2, 3]}, DoesNotRaise(), ), # two data dicts; one empty and one non-empty dict; same keys From db1b4737fec31de88de5c0f946faf95a4ca88372 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 23 May 2024 13:04:09 +0000 Subject: [PATCH 23/26] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?= =?UTF-8?q?=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/detection/utils.py | 54 ++++++++++++---------------------- test/detection/test_core.py | 17 ++++------- test/detection/test_utils.py | 18 ++++-------- 3 files changed, 30 insertions(+), 59 deletions(-) diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py index b8b8f7c19..4beea2ed5 100644 --- a/supervision/detection/utils.py +++ b/supervision/detection/utils.py @@ -56,8 +56,7 @@ def box_area(box): top_left = np.maximum(boxes_true[:, None, :2], boxes_detection[:, :2]) bottom_right = np.minimum(boxes_true[:, None, 2:], boxes_detection[:, 2:]) - area_inter = np.prod( - np.clip(bottom_right - top_left, a_min=0, a_max=None), 2) + area_inter = np.prod(np.clip(bottom_right - top_left, a_min=0, a_max=None), 2) return area_inter / (area_true[:, None] + area_detection - area_inter) @@ -82,8 +81,7 @@ def _mask_iou_batch_split( masks_true_area = masks_true.sum(axis=(1, 2)) masks_detection_area = masks_detection.sum(axis=(1, 2)) - union_area = masks_true_area[:, None] + \ - masks_detection_area - intersection_area + union_area = masks_true_area[:, None] + masks_detection_area - intersection_area return np.divide( intersection_area, @@ -134,8 +132,7 @@ def mask_iou_batch( 1, ) for i in range(0, masks_true.shape[0], step): - ious.append(_mask_iou_batch_split( - masks_true[i: i + step], masks_detection)) + ious.append(_mask_iou_batch_split(masks_true[i : i + step], masks_detection)) return np.vstack(ious) @@ -165,8 +162,7 @@ def resize_masks(masks: np.ndarray, max_dimension: int = 640) -> np.ndarray: resized_masks = masks[:, yv, xv] - resized_masks = resized_masks.reshape( - masks.shape[0], new_height, new_width) + resized_masks = resized_masks.reshape(masks.shape[0], new_height, new_width) return resized_masks @@ -219,9 +215,8 @@ def mask_non_max_suppression( keep = np.ones(rows, dtype=bool) for i in range(rows): if keep[i]: - condition = (ious[i] > iou_threshold) & ( - categories[i] == categories) - keep[i + 1:] = np.where(condition[i + 1:], False, keep[i + 1:]) + condition = (ious[i] > iou_threshold) & (categories[i] == categories) + keep[i + 1 :] = np.where(condition[i + 1 :], False, keep[i + 1 :]) return keep[sort_index.argsort()] @@ -567,8 +562,7 @@ def approximate_polygon( approximated_points = polygon while True: epsilon += epsilon_step - new_approximated_points = cv2.approxPolyDP( - polygon, epsilon, closed=True) + new_approximated_points = cv2.approxPolyDP(polygon, epsilon, closed=True) if len(new_approximated_points) > target_points: approximated_points = new_approximated_points else: @@ -597,8 +591,7 @@ def extract_ultralytics_masks(yolov8_results) -> Optional[np.ndarray]: ) top, left = int(pad[1]), int(pad[0]) - bottom, right = int( - inference_shape[0] - pad[1]), int(inference_shape[1] - pad[0]) + bottom, right = int(inference_shape[0] - pad[1]), int(inference_shape[1] - pad[0]) mask_maps = [] masks = yolov8_results.masks.data.cpu().numpy() @@ -665,8 +658,7 @@ def process_roboflow_result( polygon = np.array( [[point["x"], point["y"]] for point in prediction["points"]], dtype=int ) - mask = polygon_to_mask( - polygon, resolution_wh=(image_width, image_height)) + mask = polygon_to_mask(polygon, resolution_wh=(image_width, image_height)) xyxy.append([x_min, y_min, x_max, y_max]) class_id.append(prediction["class_id"]) class_name.append(prediction["class"]) @@ -677,12 +669,10 @@ def process_roboflow_result( xyxy = np.array(xyxy) if len(xyxy) > 0 else np.empty((0, 4)) confidence = np.array(confidence) if len(confidence) > 0 else np.empty(0) - class_id = np.array(class_id).astype( - int) if len(class_id) > 0 else np.empty(0) + class_id = np.array(class_id).astype(int) if len(class_id) > 0 else np.empty(0) class_name = np.array(class_name) if len(class_name) > 0 else np.empty(0) masks = np.array(masks, dtype=bool) if len(masks) > 0 else None - tracker_id = np.array(tracker_ids).astype( - int) if len(tracker_ids) > 0 else None + tracker_id = np.array(tracker_ids).astype(int) if len(tracker_ids) > 0 else None data = {CLASS_NAME_DATA_FIELD: class_name} return xyxy, confidence, class_id, masks, tracker_id, data @@ -742,15 +732,13 @@ def move_masks( """ if offset[0] < 0 or offset[1] < 0: - raise ValueError( - f"Offset values must be non-negative integers. Got: {offset}") + raise ValueError(f"Offset values must be non-negative integers. Got: {offset}") - mask_array = np.full( - (masks.shape[0], resolution_wh[1], resolution_wh[0]), False) + mask_array = np.full((masks.shape[0], resolution_wh[1], resolution_wh[0]), False) mask_array[ :, - offset[1]: masks.shape[1] + offset[1], - offset[0]: masks.shape[2] + offset[0], + offset[1] : masks.shape[1] + offset[1], + offset[0] : masks.shape[2] + offset[0], ] = masks return mask_array @@ -816,10 +804,8 @@ def sum_over_mask(indices: np.ndarray, axis: tuple) -> np.ndarray: return np.tensordot(masks, indices, axes=axis) aggregation_axis = ([1, 2], [0, 1]) - centroid_x = sum_over_mask( - horizontal_indices, aggregation_axis) / total_pixels - centroid_y = sum_over_mask( - vertical_indices, aggregation_axis) / total_pixels + centroid_x = sum_over_mask(horizontal_indices, aggregation_axis) / total_pixels + centroid_y = sum_over_mask(vertical_indices, aggregation_axis) / total_pixels return np.column_stack((centroid_x, centroid_y)).astype(int) @@ -897,8 +883,7 @@ def merge_data( elif ndim > 1: merged_data[key] = np.vstack(merged_data[key]) else: - raise ValueError( - f"Unexpected array dimension for key '{key}'.") + raise ValueError(f"Unexpected array dimension for key '{key}'.") else: raise ValueError( f"Inconsistent data types for key '{key}'. Only np.ndarray and list " @@ -943,7 +928,6 @@ def get_data_item( else: raise TypeError(f"Unsupported index type: {type(index)}") else: - raise TypeError( - f"Unsupported data type for key '{key}': {type(value)}") + raise TypeError(f"Unsupported data type for key '{key}': {type(value)}") return subset_data diff --git a/test/detection/test_core.py b/test/detection/test_core.py index bef511e53..dc58c9e8c 100644 --- a/test/detection/test_core.py +++ b/test/detection/test_core.py @@ -193,8 +193,7 @@ DoesNotRaise(), ), # take only first detection by index slice (1, 3) (DETECTIONS, 10, None, pytest.raises(IndexError)), # index out of range - (DETECTIONS, [0, 2, 10], None, pytest.raises( - IndexError)), # index out of range + (DETECTIONS, [0, 2, 10], None, pytest.raises(IndexError)), # index out of range (DETECTIONS, np.array([0, 2, 10]), None, pytest.raises(IndexError)), ( DETECTIONS, @@ -550,7 +549,7 @@ def test_equal( None, pytest.raises(ValueError), ), # confidence: None + [x] - ( + ( mock_detections( xyxy=[[0, 0, 20, 20]], mask=[np.array([[1, 1, 0], [1, 1, 0], [0, 0, 0]], dtype=bool)], @@ -563,10 +562,7 @@ def test_equal( pytest.raises(ValueError), ), # mask: None + [x] ( - mock_detections( - xyxy=[[0, 0, 20, 20]], - tracker_id=[1] - ), + mock_detections(xyxy=[[0, 0, 20, 20]], tracker_id=[1]), mock_detections( xyxy=[[10, 10, 30, 30]], tracker_id=None, @@ -575,17 +571,14 @@ def test_equal( pytest.raises(ValueError), ), # tracker_id: None + [] ( - mock_detections( - xyxy=[[0, 0, 20, 20]], - class_id=[1] - ), + mock_detections(xyxy=[[0, 0, 20, 20]], class_id=[1]), mock_detections( xyxy=[[10, 10, 30, 30]], class_id=None, ), None, pytest.raises(ValueError), - ) # class_id: None + [] + ), # class_id: None + [] ], ) def test_merge_inner_detection_object_pair( diff --git a/test/detection/test_utils.py b/test/detection/test_utils.py index cb7537e19..9a1fa8c93 100644 --- a/test/detection/test_utils.py +++ b/test/detection/test_utils.py @@ -666,8 +666,7 @@ def test_filter_polygons_by_area( "image": {"width": 1000, "height": 1000}, }, ( - np.array([[175.0, 275.0, 225.0, 325.0], - [450.0, 450.0, 550.0, 550.0]]), + np.array([[175.0, 275.0, 225.0, 325.0], [450.0, 450.0, 550.0, 550.0]]), np.array([0.9, 0.8]), np.array([0, 7]), None, @@ -1121,10 +1120,8 @@ def test_calculate_masks_centroids( ), # two data dicts with the same field name and np.array values as 2D arrays ( [ - {"test_1": np.array([1, 2, 3]), - "test_2": np.array(["a", "b", "c"])}, - {"test_1": np.array([3, 2, 1]), - "test_2": np.array(["c", "b", "a"])}, + {"test_1": np.array([1, 2, 3]), "test_2": np.array(["a", "b", "c"])}, + {"test_1": np.array([3, 2, 1]), "test_2": np.array(["c", "b", "a"])}, ], { "test_1": np.array([1, 2, 3, 3, 2, 1]), @@ -1153,10 +1150,8 @@ def test_calculate_masks_centroids( ), # two data dicts with the same field name and 1D and 2D arrays values ( [ - {"test_1": np.array([1, 2, 3]), - "test_2": np.array(["a", "b"])}, - {"test_1": np.array([3, 2, 1]), - "test_2": np.array(["c", "b", "a"])}, + {"test_1": np.array([1, 2, 3]), "test_2": np.array(["a", "b"])}, + {"test_1": np.array([3, 2, 1]), "test_2": np.array(["c", "b", "a"])}, ], None, pytest.raises(ValueError), @@ -1167,8 +1162,7 @@ def test_calculate_masks_centroids( DoesNotRaise(), ), # two data dicts; one empty and one non-empty dict ( - [{"test_1": [], "test_2": []}, { - "test_1": [1, 2, 3], "test_2": [1, 2, 3]}], + [{"test_1": [], "test_2": []}, {"test_1": [1, 2, 3], "test_2": [1, 2, 3]}], {"test_1": [1, 2, 3], "test_2": [1, 2, 3]}, DoesNotRaise(), ), # two data dicts; one empty and one non-empty dict; same keys From 0721bc289b8f9cea901ac3e9004e2b305f618c9b Mon Sep 17 00:00:00 2001 From: Linas Kondrackis Date: Thu, 23 May 2024 16:21:54 +0300 Subject: [PATCH 24/26] Remove _set_at_index --- supervision/detection/core.py | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/supervision/detection/core.py b/supervision/detection/core.py index 6abc8dadd..069eaf09c 100644 --- a/supervision/detection/core.py +++ b/supervision/detection/core.py @@ -1068,33 +1068,6 @@ def __setitem__(self, key: str, value: Union[np.ndarray, List]): self.data[key] = value - def _set_at_index(self, index: int, other: Detections): - """ - Set detection values (xyxy, confidence, ...) at a specified index - to those of another Detections object, at index 0. - - Args: - index (int): The index in current detection, where values - will be set. - other (Detections): Detections object with exactly one element - to set the values from. - - Raises: - ValueError: If `other` is not made of exactly one element. - """ - if len(other) != 1: - raise ValueError("Detection to set from must have exactly one element.") - - self.xyxy[index] = other.xyxy[0] - if self.mask is not None and other.mask is not None: - self.mask[index] = other.mask[0] - if self.confidence is not None and other.confidence is not None: - self.confidence[index] = other.confidence[0] - if self.class_id is not None and other.class_id is not None: - self.class_id[index] = other.class_id[0] - if self.tracker_id is not None and other.tracker_id is not None: - self.tracker_id[index] = other.tracker_id[0] - @property def area(self) -> np.ndarray: """ From 530e1d01e152e45bd9f5bb37553f8bacbc6aeb75 Mon Sep 17 00:00:00 2001 From: Linas Kondrackis Date: Mon, 27 May 2024 16:17:27 +0300 Subject: [PATCH 25/26] Address comments --- supervision/detection/core.py | 52 ++++++++++++++--------------------- test/detection/test_core.py | 4 +-- 2 files changed, 23 insertions(+), 33 deletions(-) diff --git a/supervision/detection/core.py b/supervision/detection/core.py index 069eaf09c..f85d403d7 100644 --- a/supervision/detection/core.py +++ b/supervision/detection/core.py @@ -1203,7 +1203,7 @@ def with_nmm( result = [] for merge_group in merge_groups: unmerged_detections = [self[i] for i in merge_group] - merged_detections = _merge_inner_detections_objects( + merged_detections = merge_inner_detections_objects( unmerged_detections, threshold ) result.append(merged_detections) @@ -1211,7 +1211,7 @@ def with_nmm( return Detections.merge(result) -def _merge_inner_detection_object_pair( +def merge_inner_detection_object_pair( detections_1: Detections, detections_2: Detections ) -> Detections: """ @@ -1259,29 +1259,23 @@ def _merge_inner_detection_object_pair( if len(detections_1) != 1 or len(detections_2) != 1: raise ValueError("Both Detections should have exactly 1 detected object.") - _verify_fields_both_defined_or_none(detections_1, detections_2) + validate_fields_both_defined_or_none(detections_1, detections_2) + xyxy_1 = detections_1.xyxy[0] + xyxy_2 = detections_2.xyxy[0] if detections_1.confidence is None and detections_2.confidence is None: merged_confidence = None else: - area_det1 = (detections_1.xyxy[0][2] - detections_1.xyxy[0][0]) * ( - detections_1.xyxy[0][3] - detections_1.xyxy[0][1] - ) - area_det2 = (detections_2.xyxy[0][2] - detections_2.xyxy[0][0]) * ( - detections_2.xyxy[0][3] - detections_2.xyxy[0][1] - ) + detection_1_area = (xyxy_1[2] - xyxy_1[0]) * (xyxy_1[3] - xyxy_1[1]) + detections_2_area = (xyxy_2[2] - xyxy_2[0]) * (xyxy_2[3] - xyxy_2[1]) merged_confidence = ( - area_det1 * detections_1.confidence[0] - + area_det2 * detections_2.confidence[0] - ) / (area_det1 + area_det2) + detection_1_area * detections_1.confidence[0] + + detections_2_area * detections_2.confidence[0] + ) / (detection_1_area + detections_2_area) merged_confidence = np.array([merged_confidence]) - merged_x1, merged_y1 = np.minimum( - detections_1.xyxy[0][:2], detections_2.xyxy[0][:2] - ) - merged_x2, merged_y2 = np.maximum( - detections_1.xyxy[0][2:], detections_2.xyxy[0][2:] - ) + merged_x1, merged_y1 = np.minimum(xyxy_1[:2], xyxy_2[:2]) + merged_x2, merged_y2 = np.maximum(xyxy_1[2:], xyxy_2[2:]) merged_xyxy = np.array([[merged_x1, merged_y1, merged_x2, merged_y2]]) if detections_1.mask is None and detections_2.mask is None: @@ -1290,27 +1284,23 @@ def _merge_inner_detection_object_pair( merged_mask = np.logical_or(detections_1.mask, detections_2.mask) if detections_1.confidence is None and detections_2.confidence is None: - winning_det = detections_1 + winning_detection = detections_1 elif detections_1.confidence[0] >= detections_2.confidence[0]: - winning_det = detections_1 + winning_detection = detections_1 else: - winning_det = detections_2 - - winning_class_id = winning_det.class_id - winning_tracker_id = winning_det.tracker_id - winning_data = winning_det.data + winning_detection = detections_2 return Detections( xyxy=merged_xyxy, mask=merged_mask, confidence=merged_confidence, - class_id=winning_class_id, - tracker_id=winning_tracker_id, - data=winning_data, + class_id=winning_detection.class_id, + tracker_id=winning_detection.tracker_id, + data=winning_detection.data, ) -def _merge_inner_detections_objects( +def merge_inner_detections_objects( detections: List[Detections], threshold=0.5 ) -> Detections: """ @@ -1326,11 +1316,11 @@ def _merge_inner_detections_objects( box_iou = box_iou_batch(detections_1.xyxy, detections_2.xyxy)[0] if box_iou < threshold: break - detections_1 = _merge_inner_detection_object_pair(detections_1, detections_2) + detections_1 = merge_inner_detection_object_pair(detections_1, detections_2) return detections_1 -def _verify_fields_both_defined_or_none( +def validate_fields_both_defined_or_none( detections_1: Detections, detections_2: Detections ) -> None: """ diff --git a/test/detection/test_core.py b/test/detection/test_core.py index dc58c9e8c..af1d58762 100644 --- a/test/detection/test_core.py +++ b/test/detection/test_core.py @@ -5,7 +5,7 @@ import numpy as np import pytest -from supervision.detection.core import Detections, _merge_inner_detection_object_pair +from supervision.detection.core import Detections, merge_inner_detection_object_pair from supervision.geometry.core import Position PREDICTIONS = np.array( @@ -588,5 +588,5 @@ def test_merge_inner_detection_object_pair( exception: Exception, ): with exception: - result = _merge_inner_detection_object_pair(detection_1, detection_2) + result = merge_inner_detection_object_pair(detection_1, detection_2) assert result == expected_result From 2ee9e08446a071c50ff8acf000f80fdc0bb6c0a9 Mon Sep 17 00:00:00 2001 From: Linas Kondrackis Date: Mon, 27 May 2024 16:21:40 +0300 Subject: [PATCH 26/26] Renamed to group_overlapping_boxes --- supervision/detection/utils.py | 6 +++--- test/detection/test_utils.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py index 4beea2ed5..74726995e 100644 --- a/supervision/detection/utils.py +++ b/supervision/detection/utils.py @@ -275,7 +275,7 @@ def box_non_max_suppression( return keep[sort_index.argsort()] -def _box_non_max_merge_all( +def group_overlapping_boxes( predictions: npt.NDArray[np.float64], iou_threshold: float = 0.5 ) -> List[List[int]]: """ @@ -338,13 +338,13 @@ def box_non_max_merge( Each group may have 1 or more elements. """ if predictions.shape[1] == 5: - return _box_non_max_merge_all(predictions, iou_threshold) + return group_overlapping_boxes(predictions, iou_threshold) category_ids = predictions[:, 5] merge_groups = [] for category_id in np.unique(category_ids): curr_indices = np.where(category_ids == category_id)[0] - merge_class_groups = _box_non_max_merge_all( + merge_class_groups = group_overlapping_boxes( predictions[curr_indices], iou_threshold ) diff --git a/test/detection/test_utils.py b/test/detection/test_utils.py index 9a1fa8c93..b62faa619 100644 --- a/test/detection/test_utils.py +++ b/test/detection/test_utils.py @@ -6,12 +6,12 @@ from supervision.config import CLASS_NAME_DATA_FIELD from supervision.detection.utils import ( - _box_non_max_merge_all, box_non_max_suppression, calculate_masks_centroids, clip_boxes, filter_polygons_by_area, get_data_item, + group_overlapping_boxes, mask_non_max_suppression, merge_data, move_boxes, @@ -241,14 +241,14 @@ def test_box_non_max_suppression( ), # confidence ], ) -def test_box_non_max_merge( +def test_group_overlapping_boxes( predictions: np.ndarray, iou_threshold: float, expected_result: List[List[int]], exception: Exception, ) -> None: with exception: - result = _box_non_max_merge_all( + result = group_overlapping_boxes( predictions=predictions, iou_threshold=iou_threshold )