modify fcos_rt

yjh0410 · yjh0410 · commit da836aff96af · 2023-12-11T23:00:14.000+08:00
diff --git a/config/fcos_config.py b/config/fcos_config.py
@@ -482,17 +482,19 @@
         'max_stride': 32,
         'out_stride': [8, 16, 32],
         ## Neck
-        'neck': 'pafpn',
-        'fpn_p6_feat': False,
-        'fpn_p7_feat': False,
-        'fpn_p6_from_c5': False,
+        'neck': 'fcos_rt_pafpn',
+        'use_spp': True,
+        'spp_act': 'silu',
+        'spp_norm': 'BN',
+        'depth': 3,
+        'fpn_depthwise': False,
         ## Head
         'head': 'fcos_head',
         'head_dim': 256,
         'num_cls_head': 4,
         'num_reg_head': 4,
-        'head_act': 'relu',
-        'head_norm': 'GN',
+        'head_act': 'silu',
+        'head_norm': 'BN',
         ## Post-process
         'train_topk': 1000,
         'train_conf_thresh': 0.05,
@@ -502,7 +504,7 @@
         'test_nms_thresh': 0.45,
         'nms_class_agnostic': True,  # We prefer to use class-agnostic NMS in the demo.
         # ----------------- Label Assignment -----------------
-        'matcher': 'fcos_matcher',
+        'matcher': 'aligned_simota',
         'matcher_hpy':{'center_sampling_radius': 1.5,
                        'object_sizes_of_interest': [[-1, 64], [64, 128], [128, float('inf')]]
                        },
@@ -511,164 +513,8 @@
         'focal_loss_alpha': 0.25,
         'focal_loss_gamma': 2.0,
         'loss_cls_weight': 1.0,
-        'loss_reg_weight': 1.0,
-        'loss_ctn_weight': 1.0,
-        # ----------------- Training -----------------
-        ## Training scheduler
-        'scheduler': '3x',
-        ## Optimizer
-        'optimizer': 'sgd',
-        'base_lr': 0.01 / 16,
-        'backbone_lr_ratio': 1.0 / 1.0,
-        'momentum': 0.9,
-        'weight_decay': 1e-4,
-        'clip_max_norm': -1.0,
-        ## LR Scheduler
-        'lr_scheduler': 'step',
-        'warmup': 'linear',
-        'warmup_iters': 500,
-        'warmup_factor': 0.00066667,
-        ## Epoch
-        'max_epoch': 36,       # 3x
-        'lr_epoch': [24, 33],  # 3x
-        # ----------------- Input -----------------
-        ## Transforms
-        'train_min_size': [256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608],   # short edge of image
-        'train_max_size': 900,
-        'test_min_size': 512,
-        'test_max_size': 736,
-        ## Pixel mean & std
-        'pixel_mean': [0.485, 0.456, 0.406],
-        'pixel_std':  [0.229, 0.224, 0.225],
-        ## Transforms
-        'detr_style': False,
-        'trans_config': [
-            {'name': 'RandomHFlip'},
-            {'name': 'RandomResize'},
-        ],
-        'normalize_coords': False,
-    },
-
-    'fcos_rt_r50_3x':{
-        # ----------------- Model-----------------
-        ## Backbone
-        'backbone': 'resnet50',
-        'backbone_norm': 'FrozeBN',
-        'res5_dilation': False,
-        'pretrained': True,
-        'pretrained_weight': 'imagenet1k_v1',
-        'max_stride': 32,
-        'out_stride': [8, 16, 32],
-        ## Neck
-        'neck': 'pafpn',
-        'fpn_p6_feat': False,
-        'fpn_p7_feat': False,
-        'fpn_p6_from_c5': False,
-        ## Head
-        'head': 'fcos_head',
-        'head_dim': 256,
-        'num_cls_head': 4,
-        'num_reg_head': 4,
-        'head_act': 'relu',
-        'head_norm': 'GN',
-        ## Post-process
-        'train_topk': 1000,
-        'train_conf_thresh': 0.05,
-        'train_nms_thresh': 0.65,
-        'test_topk': 100,
-        'test_conf_thresh': 0.5,
-        'test_nms_thresh': 0.45,
-        'nms_class_agnostic': True,  # We prefer to use class-agnostic NMS in the demo.
-        # ----------------- Label Assignment -----------------
-        'matcher': 'fcos_matcher',
-        'matcher_hpy':{'center_sampling_radius': 1.5,
-                       'object_sizes_of_interest': [[-1, 64], [64, 128], [128, float('inf')]]
-                       },
-        # ----------------- Loss weight -----------------
-        ## Loss hyper-parameters
-        'focal_loss_alpha': 0.25,
-        'focal_loss_gamma': 2.0,
-        'loss_cls_weight': 1.0,
-        'loss_reg_weight': 1.0,
-        'loss_ctn_weight': 1.0,
-        # ----------------- Training -----------------
-        ## Training scheduler
-        'scheduler': '3x',
-        ## Optimizer
-        'optimizer': 'sgd',
-        'base_lr': 0.01 / 16,
-        'backbone_lr_ratio': 1.0 / 1.0,
-        'momentum': 0.9,
-        'weight_decay': 1e-4,
-        'clip_max_norm': -1.0,
-        ## LR Scheduler
-        'lr_scheduler': 'step',
-        'warmup': 'linear',
-        'warmup_iters': 500,
-        'warmup_factor': 0.00066667,
-        ## Epoch
-        'max_epoch': 36,       # 3x
-        'lr_epoch': [24, 33],  # 3x
-        # ----------------- Input -----------------
-        ## Transforms
-        'train_min_size': [256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608],   # short edge of image
-        'train_max_size': 900,
-        'test_min_size': 512,
-        'test_max_size': 736,
-        ## Pixel mean & std
-        'pixel_mean': [0.485, 0.456, 0.406],
-        'pixel_std':  [0.229, 0.224, 0.225],
-        ## Transforms
-        'detr_style': False,
-        'trans_config': [
-            {'name': 'RandomHFlip'},
-            {'name': 'RandomResize'},
-        ],
-        'normalize_coords': False,
-    },
-
-    'fcos_rt_r101_3x':{
-        # ----------------- Model-----------------
-        ## Backbone
-        'backbone': 'resnet101',
-        'backbone_norm': 'FrozeBN',
-        'res5_dilation': False,
-        'pretrained': True,
-        'pretrained_weight': 'imagenet1k_v1',
-        'max_stride': 32,
-        'out_stride': [8, 16, 32],
-        ## Neck
-        'neck': 'pafpn',
-        'fpn_p6_feat': False,
-        'fpn_p7_feat': False,
-        'fpn_p6_from_c5': False,
-        ## Head
-        'head': 'fcos_head',
-        'head_dim': 256,
-        'num_cls_head': 4,
-        'num_reg_head': 4,
-        'head_act': 'relu',
-        'head_norm': 'GN',
-        ## Post-process
-        'train_topk': 1000,
-        'train_conf_thresh': 0.05,
-        'train_nms_thresh': 0.65,
-        'test_topk': 100,
-        'test_conf_thresh': 0.5,
-        'test_nms_thresh': 0.45,
-        'nms_class_agnostic': True,  # We prefer to use class-agnostic NMS in the demo.
-        # ----------------- Label Assignment -----------------
-        'matcher': 'fcos_matcher',
-        'matcher_hpy':{'center_sampling_radius': 1.5,
-                       'object_sizes_of_interest': [[-1, 64], [64, 128], [128, float('inf')]]
-                       },
-        # ----------------- Loss weight -----------------
-        ## Loss hyper-parameters
-        'focal_loss_alpha': 0.25,
-        'focal_loss_gamma': 2.0,
-        'loss_cls_weight': 1.0,
-        'loss_reg_weight': 1.0,
-        'loss_ctn_weight': 1.0,
+        'loss_reg_weight': 2.0,
+        'loss_ctn_weight': 0.5,
         # ----------------- Training -----------------
         ## Training scheduler
         'scheduler': '3x',
diff --git a/models/detectors/fcos/criterion.py b/models/detectors/fcos/criterion.py
@@ -78,7 +78,7 @@ def loss_bboxes(self, pred_delta, tgt_delta, bbox_quality=None, num_boxes=1.0):
 
         return loss_box.sum() / num_boxes
 
-    def forward(self, outputs, targets):
+    def fcos_loss(self, outputs, targets):
         """
             outputs['pred_cls']: (Tensor) [B, M, C]
             outputs['pred_reg']: (Tensor) [B, M, 4]
@@ -139,6 +139,26 @@ def forward(self, outputs, targets):
 
         return loss_dict
     
+    def ota_loss(self, outputs, targets):
+        return
+    
+    def forward(self, outputs, targets):
+        """
+            outputs['pred_cls']: (Tensor) [B, M, C]
+            outputs['pred_reg']: (Tensor) [B, M, 4]
+            outputs['pred_ctn']: (Tensor) [B, M, 1]
+            outputs['strides']: (List) [8, 16, 32, ...] stride of the model output
+            targets: (List) [dict{'boxes': [...], 
+                                 'labels': [...], 
+                                 'orig_size': ...}, ...]
+        """
+        if self.cfg['matcher'] == "fcos_matcher":
+            return self.fcos_loss(outputs, targets)
+        elif self.cfg['matcher'] == "aligned_simota":
+            return self.ota_loss(outputs, targets)
+        else:
+            raise NotImplementedError
+            
 
 # build criterion
 def build_criterion(cfg, device, num_classes=80):
diff --git a/models/detectors/fcos/matcher.py b/models/detectors/fcos/matcher.py
@@ -222,3 +222,9 @@ def __call__(self, fpn_strides, anchors, targets):
 
         # [B, M], [B, M, 4], [B, M]
         return torch.stack(gt_classes), torch.stack(gt_anchors_deltas), torch.stack(gt_centerness)
+
+
+class AlignedSimOtaMatcher(object):
+    def __init__(self) -> None:
+        pass
+    
diff --git a/models/neck/__init__.py b/models/neck/__init__.py
@@ -1,5 +1,5 @@
 from .dilated_encoder import DilatedEncoder
-from .fpn import BasicFPN, PaFPN, DETRXPaFPN
+from .fpn import BasicFPN, FcosRTPaFPN, DETRXPaFPN
 
 
 # build neck
@@ -22,12 +22,14 @@ def build_neck(cfg, in_dim, out_dim):
                          p7_feat = cfg['fpn_p7_feat'],
                          from_c5 = cfg['fpn_p6_from_c5'], 
                          )
-    elif cfg['neck'] == 'pafpn':
-        model = PaFPN(in_dims = in_dim,
-                      out_dim = out_dim,
-                      p6_feat = cfg['fpn_p6_feat'],
-                      p7_feat = cfg['fpn_p7_feat'],
-                      )
+    elif cfg['neck'] == 'fcos_rt_pafpn':
+        model = FcosRTPaFPN(cfg     = cfg,
+                            in_dims = in_dim,
+                            out_dim = out_dim,
+                            depth   = cfg['depth'],
+                            use_spp = cfg['use_spp'],
+                            depthwise = cfg['fpn_depthwise']
+                            )
     elif cfg['neck'] == 'detrx_pafpn':
         model = DETRXPaFPN(in_dims = in_dim,
                            out_dim = out_dim,
@@ -37,5 +39,7 @@ def build_neck(cfg, in_dim, out_dim):
                            from_p5 = False,
                            depthwise = cfg['fpn_depthwise']
                            )
+    else:
+        raise NotImplementedError
         
     return model
diff --git a/models/neck/fpn.py b/models/neck/fpn.py
diff --git a/models/neck/spp.py b/models/neck/spp.py