@@ -38,6 +38,8 @@ def assign_boxes_to_levels(box_lists, min_level, max_level, canonical_box_size,
38
38
level_assignments = torch .floor (
39
39
canonical_level + torch .log2 (box_sizes / canonical_box_size + eps )
40
40
)
41
+ # clamp level to (min, max), in case the box size is too large or too small
42
+ # for the available feature maps
41
43
level_assignments = torch .clamp (level_assignments , min = min_level , max = max_level )
42
44
return level_assignments .to (torch .int64 ) - min_level
43
45
@@ -100,15 +102,25 @@ def __init__(
100
102
e.g., 14 x 14. If tuple or list is given, the length must be 2.
101
103
scales (list[float]): The scale for each low-level pooling op relative to
102
104
the input image. For a feature map with stride s relative to the input
103
- image, scale is defined as a 1 / s.
105
+ image, scale is defined as a 1 / s. The stride must be power of 2.
106
+ When there are multiple scales, they must form a pyramid, i.e. they must be
107
+ a monotically decreasing geometric sequence with a factor of 1/2.
104
108
sampling_ratio (int): The `sampling_ratio` parameter for the ROIAlign op.
105
109
pooler_type (string): Name of the type of pooling operation that should be applied.
106
110
For instance, "ROIPool" or "ROIAlignV2".
107
111
canonical_box_size (int): A canonical box size in pixels (sqrt(box area)). The default
108
112
is heuristically defined as 224 pixels in the FPN paper (based on ImageNet
109
113
pre-training).
110
- canonical_level (int): The feature map level index on which a canonically-sized box
111
- should be placed. The default is defined as level 4 in the FPN paper.
114
+ canonical_level (int): The feature map level index from which a canonically-sized box
115
+ should be placed. The default is defined as level 4 (stride=16) in the FPN paper,
116
+ i.e., a box of size 224x224 will be placed on the feature with stride=16.
117
+ The box placement for all boxes will be determined from their sizes w.r.t
118
+ canonical_box_size. For example, a box whose area is 4x that of a canonical box
119
+ should be used to pool features from feature level ``canonical_level+1``.
120
+
121
+ Note that the actual input feature maps given to this module may not have
122
+ sufficiently many levels for the input boxes. If the boxes are too large or too
123
+ small for the input feature maps, the closest level will be used.
112
124
"""
113
125
super ().__init__ ()
114
126
@@ -148,22 +160,32 @@ def __init__(
148
160
# assumption that stride is a power of 2.
149
161
min_level = - math .log2 (scales [0 ])
150
162
max_level = - math .log2 (scales [- 1 ])
151
- assert math .isclose (min_level , int (min_level )) and math .isclose (max_level , int (max_level ))
163
+ assert math .isclose (min_level , int (min_level )) and math .isclose (
164
+ max_level , int (max_level )
165
+ ), "Featuremap stride is not power of 2!"
152
166
self .min_level = int (min_level )
153
167
self .max_level = int (max_level )
168
+ assert (
169
+ len (scales ) == self .max_level - self .min_level + 1
170
+ ), "[ROIPooler] Sizes of input featuremaps do not form a pyramid!"
154
171
assert 0 < self .min_level and self .min_level <= self .max_level
155
- assert self .min_level <= canonical_level and canonical_level <= self .max_level
172
+ if len (scales ) > 1 :
173
+ # When there is only one feature map, canonical_level is redundant and we should not
174
+ # require it to be a sensible value. Therefore we skip this assertion
175
+ assert self .min_level <= canonical_level and canonical_level <= self .max_level
156
176
self .canonical_level = canonical_level
157
177
assert canonical_box_size > 0
158
178
self .canonical_box_size = canonical_box_size
159
179
160
180
def forward (self , x , box_lists ):
161
181
"""
162
182
Args:
163
- x (list[Tensor]): A list of feature maps with scales matching those used to
164
- construct this module.
183
+ x (list[Tensor]): A list of feature maps of NCHW shape, with scales matching those
184
+ used to construct this module.
165
185
box_lists (list[Boxes] | list[RotatedBoxes]):
166
186
A list of N Boxes or N RotatedBoxes, where N is the number of images in the batch.
187
+ The box coordinates are defined on the original image and
188
+ will be scaled by the `scales` argument of :class:`ROIPooler`.
167
189
168
190
Returns:
169
191
Tensor:
@@ -172,6 +194,9 @@ def forward(self, x, box_lists):
172
194
"""
173
195
num_level_assignments = len (self .level_poolers )
174
196
197
+ assert isinstance (x , list ) and isinstance (
198
+ box_lists , list
199
+ ), "Arguments to pooler must be lists"
175
200
assert (
176
201
len (x ) == num_level_assignments
177
202
), "unequal value, num_level_assignments={}, but x is list of {} Tensors" .format (
0 commit comments