update docs for poolers

ppwwyyxx · facebook-github-bot · commit ffa5213215bc · 2019-11-08T19:57:03.000-08:00
Summary: Pull Request resolved: fairinternal/detectron2#322 Differential Revision: D18395920 Pulled By: ppwwyyxx fbshipit-source-id: 6c03e9474f289fcc293de5bfcd373f8cbc393914
diff --git a/detectron2/data/dataset_mapper.py b/detectron2/data/dataset_mapper.py
@@ -1,7 +1,6 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 import copy
 import logging
-
 import numpy as np
 import torch
 from fvcore.common.file_io import PathManager
diff --git a/detectron2/modeling/poolers.py b/detectron2/modeling/poolers.py
@@ -38,6 +38,8 @@ def assign_boxes_to_levels(box_lists, min_level, max_level, canonical_box_size,
     level_assignments = torch.floor(
         canonical_level + torch.log2(box_sizes / canonical_box_size + eps)
     )
+    # clamp level to (min, max), in case the box size is too large or too small
+    # for the available feature maps
     level_assignments = torch.clamp(level_assignments, min=min_level, max=max_level)
     return level_assignments.to(torch.int64) - min_level
 
@@ -100,15 +102,25 @@ def __init__(
                 e.g., 14 x 14. If tuple or list is given, the length must be 2.
             scales (list[float]): The scale for each low-level pooling op relative to
                 the input image. For a feature map with stride s relative to the input
-                image, scale is defined as a 1 / s.
+                image, scale is defined as a 1 / s. The stride must be power of 2.
+                When there are multiple scales, they must form a pyramid, i.e. they must be
+                a monotically decreasing geometric sequence with a factor of 1/2.
             sampling_ratio (int): The `sampling_ratio` parameter for the ROIAlign op.
             pooler_type (string): Name of the type of pooling operation that should be applied.
                 For instance, "ROIPool" or "ROIAlignV2".
             canonical_box_size (int): A canonical box size in pixels (sqrt(box area)). The default
                 is heuristically defined as 224 pixels in the FPN paper (based on ImageNet
                 pre-training).
-            canonical_level (int): The feature map level index on which a canonically-sized box
-                should be placed. The default is defined as level 4 in the FPN paper.
+            canonical_level (int): The feature map level index from which a canonically-sized box
+                should be placed. The default is defined as level 4 (stride=16) in the FPN paper,
+                i.e., a box of size 224x224 will be placed on the feature with stride=16.
+                The box placement for all boxes will be determined from their sizes w.r.t
+                canonical_box_size. For example, a box whose area is 4x that of a canonical box
+                should be used to pool features from feature level ``canonical_level+1``.
+
+                Note that the actual input feature maps given to this module may not have
+                sufficiently many levels for the input boxes. If the boxes are too large or too
+                small for the input feature maps, the closest level will be used.
         """
         super().__init__()
 
@@ -148,22 +160,32 @@ def __init__(
         # assumption that stride is a power of 2.
         min_level = -math.log2(scales[0])
         max_level = -math.log2(scales[-1])
-        assert math.isclose(min_level, int(min_level)) and math.isclose(max_level, int(max_level))
+        assert math.isclose(min_level, int(min_level)) and math.isclose(
+            max_level, int(max_level)
+        ), "Featuremap stride is not power of 2!"
         self.min_level = int(min_level)
         self.max_level = int(max_level)
+        assert (
+            len(scales) == self.max_level - self.min_level + 1
+        ), "[ROIPooler] Sizes of input featuremaps do not form a pyramid!"
         assert 0 < self.min_level and self.min_level <= self.max_level
-        assert self.min_level <= canonical_level and canonical_level <= self.max_level
+        if len(scales) > 1:
+            # When there is only one feature map, canonical_level is redundant and we should not
+            # require it to be a sensible value. Therefore we skip this assertion
+            assert self.min_level <= canonical_level and canonical_level <= self.max_level
         self.canonical_level = canonical_level
         assert canonical_box_size > 0
         self.canonical_box_size = canonical_box_size
 
     def forward(self, x, box_lists):
         """
         Args:
-            x (list[Tensor]): A list of feature maps with scales matching those used to
-                construct this module.
+            x (list[Tensor]): A list of feature maps of NCHW shape, with scales matching those
+                used to construct this module.
             box_lists (list[Boxes] | list[RotatedBoxes]):
                 A list of N Boxes or N RotatedBoxes, where N is the number of images in the batch.
+                The box coordinates are defined on the original image and
+                will be scaled by the `scales` argument of :class:`ROIPooler`.
 
         Returns:
             Tensor:
@@ -172,6 +194,9 @@ def forward(self, x, box_lists):
         """
         num_level_assignments = len(self.level_poolers)
 
+        assert isinstance(x, list) and isinstance(
+            box_lists, list
+        ), "Arguments to pooler must be lists"
         assert (
             len(x) == num_level_assignments
         ), "unequal value, num_level_assignments={}, but x is list of {} Tensors".format(
diff --git a/detectron2/structures/instances.py b/detectron2/structures/instances.py
@@ -131,6 +131,9 @@ def __len__(self) -> int:
             return len(v)
         raise NotImplementedError("Empty Instances does not support __len__!")
 
+    def __iter__(self):
+        raise NotImplementedError("`Instances` object is not iterable!")
+
     @staticmethod
     def cat(instance_lists: List["Instances"]) -> "Instances":
         """
diff --git a/setup.py b/setup.py
@@ -79,7 +79,7 @@ def get_extensions():
         "matplotlib",
         "tqdm>4.29.0",
         "tensorboard",
-        "fvcore @ git+https://github.com/facebookresearch/fvcore.git"
+        "fvcore @ git+https://github.com/facebookresearch/fvcore.git",
     ],
     extras_require={"all": ["shapely", "psutil"]},
     ext_modules=get_extensions(),
diff --git a/tests/test_rpn.py b/tests/test_rpn.py
@@ -32,7 +32,9 @@ def test_rpn(self):
         gt_instances = Instances(image_shape)
         gt_instances.gt_boxes = Boxes(gt_boxes)
         with EventStorage():  # capture events in a new storage to discard them
-            proposals, proposal_losses = proposal_generator(images, features, gt_instances)
+            proposals, proposal_losses = proposal_generator(
+                images, features, [gt_instances[0], gt_instances[1]]
+            )
 
         expected_losses = {
             "loss_rpn_cls": torch.tensor(0.0804563984),
@@ -92,7 +94,9 @@ def test_rrpn(self):
         gt_instances = Instances(image_shape)
         gt_instances.gt_boxes = RotatedBoxes(gt_boxes)
         with EventStorage():  # capture events in a new storage to discard them
-            proposals, proposal_losses = proposal_generator(images, features, gt_instances)
+            proposals, proposal_losses = proposal_generator(
+                images, features, [gt_instances[0], gt_instances[1]]
+            )
 
         expected_losses = {
             "loss_rpn_cls": torch.tensor(0.0432923734),