Skip to content

Commit 77a0f01

Browse files
committed
add PlainDETR
1 parent 461c284 commit 77a0f01

21 files changed

+1718
-577
lines changed

config/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from .retinanet_config import retinanet_cfg
33
from .fcos_config import fcos_cfg
44
from .yolof_config import yolof_cfg
5+
from .plain_detr_config import plain_detr_cfg
56

67

78
def build_config(args):
@@ -14,6 +15,9 @@ def build_config(args):
1415
# YOLOF
1516
elif args.model in yolof_cfg.keys():
1617
return yolof_cfg[args.model]
18+
# PlainDETR
19+
elif args.model in plain_detr_cfg.keys():
20+
return plain_detr_cfg[args.model]
1721

1822
else:
1923
print('Unknown Model: {}'.format(args.model))

config/fcos_config.py

Lines changed: 10 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
'backbone_norm': 'FrozeBN',
1111
'res5_dilation': False,
1212
'pretrained': True,
13+
'mae_pretrained': False,
1314
'pretrained_weight': 'imagenet1k_v1',
1415
'max_stride': 128,
1516
'out_stride': [8, 16, 32, 64, 128],
@@ -88,6 +89,7 @@
8889
'backbone_norm': 'FrozeBN',
8990
'res5_dilation': False,
9091
'pretrained': True,
92+
'mae_pretrained': False,
9193
'pretrained_weight': 'imagenet1k_v1',
9294
'max_stride': 128,
9395
'out_stride': [8, 16, 32, 64, 128],
@@ -166,6 +168,7 @@
166168
'backbone_norm': 'FrozeBN',
167169
'res5_dilation': False,
168170
'pretrained': True,
171+
'mae_pretrained': False,
169172
'pretrained_weight': 'imagenet1k_v1',
170173
'max_stride': 128,
171174
'out_stride': [8, 16, 32, 64, 128],
@@ -244,6 +247,7 @@
244247
'backbone_norm': 'FrozeBN',
245248
'res5_dilation': False,
246249
'pretrained': True,
250+
'mae_pretrained': False,
247251
'pretrained_weight': 'imagenet1k_v1',
248252
'max_stride': 128,
249253
'out_stride': [8, 16, 32, 64, 128],
@@ -322,6 +326,7 @@
322326
'backbone_norm': 'FrozeBN',
323327
'res5_dilation': False,
324328
'pretrained': True,
329+
'mae_pretrained': False,
325330
'pretrained_weight': 'imagenet1k_v1',
326331
'max_stride': 128,
327332
'out_stride': [8, 16, 32, 64, 128],
@@ -400,6 +405,7 @@
400405
'backbone_norm': 'FrozeBN',
401406
'res5_dilation': False,
402407
'pretrained': True,
408+
'mae_pretrained': False,
403409
'pretrained_weight': 'imagenet1k_v1',
404410
'max_stride': 128,
405411
'out_stride': [8, 16, 32, 64, 128],
@@ -479,6 +485,7 @@
479485
'backbone_norm': 'FrozeBN',
480486
'res5_dilation': False,
481487
'pretrained': True,
488+
'mae_pretrained': False,
482489
'pretrained_weight': 'imagenet1k_v2',
483490
'max_stride': 128,
484491
'out_stride': [8, 16, 32, 64, 128],
@@ -557,6 +564,7 @@
557564
'backbone_norm': 'FrozeBN',
558565
'res5_dilation': False,
559566
'pretrained': True,
567+
'mae_pretrained': False,
560568
'pretrained_weight': 'imagenet1k_v2',
561569
'max_stride': 128,
562570
'out_stride': [8, 16, 32, 64, 128],
@@ -635,6 +643,7 @@
635643
'backbone_norm': 'FrozeBN',
636644
'res5_dilation': False,
637645
'pretrained': True,
646+
'mae_pretrained': False,
638647
'pretrained_weight': 'imagenet1k_v2',
639648
'max_stride': 128,
640649
'out_stride': [8, 16, 32, 64, 128],
@@ -713,6 +722,7 @@
713722
'backbone_norm': 'FrozeBN',
714723
'res5_dilation': False,
715724
'pretrained': True,
725+
'mae_pretrained': False,
716726
'pretrained_weight': 'imagenet1k_v2',
717727
'max_stride': 128,
718728
'out_stride': [8, 16, 32, 64, 128],
@@ -784,87 +794,4 @@
784794
'normalize_coords': False,
785795
},
786796

787-
# Real-time FCOS
788-
'fcos_rt_r50_4x':{
789-
# ----------------- Model-----------------
790-
## Backbone
791-
'backbone': 'resnet50',
792-
'backbone_norm': 'FrozeBN',
793-
'res5_dilation': False,
794-
'pretrained': True,
795-
'pretrained_weight': 'imagenet1k_v1',
796-
'max_stride': 32,
797-
'out_stride': [8, 16, 32],
798-
## Neck
799-
'neck': 'fcos_rt_pafpn',
800-
'use_spp': True,
801-
'spp_pooling_size': 5,
802-
'spp_act': 'silu',
803-
'spp_norm': 'GN',
804-
'depth': 3,
805-
'fpn_act': 'silu',
806-
'fpn_norm': 'GN',
807-
'fpn_depthwise': False,
808-
## Head
809-
'head': 'fcos_head',
810-
'head_dim': 256,
811-
'num_cls_head': 4,
812-
'num_reg_head': 4,
813-
'head_act': 'silu',
814-
'head_norm': 'GN',
815-
## Post-process
816-
'train_topk': 1000,
817-
'train_conf_thresh': 0.05,
818-
'train_nms_thresh': 0.65,
819-
'test_topk': 100,
820-
'test_conf_thresh': 0.5,
821-
'test_nms_thresh': 0.45,
822-
'nms_class_agnostic': True, # We prefer to use class-agnostic NMS in the demo.
823-
# ----------------- Label Assignment -----------------
824-
'matcher': 'simota',
825-
'matcher_hpy':{'soft_center_radius': 2.5,
826-
'topk_candidates': 13,
827-
},
828-
# ----------------- Loss weight -----------------
829-
## Loss hyper-parameters
830-
'focal_loss_alpha': 0.25,
831-
'focal_loss_gamma': 2.0,
832-
'loss_cls_weight': 1.0,
833-
'loss_reg_weight': 2.0,
834-
'loss_ctn_weight': 0.5,
835-
# ----------------- Training -----------------
836-
## Training scheduler
837-
'scheduler': '4x',
838-
## Optimizer
839-
'optimizer': 'sgd',
840-
'base_lr': 0.01 / 16,
841-
'backbone_lr_ratio': 1.0 / 1.0,
842-
'momentum': 0.9,
843-
'weight_decay': 1e-4,
844-
'clip_max_norm': -1.0,
845-
## LR Scheduler
846-
'lr_scheduler': 'step',
847-
'warmup': 'linear',
848-
'warmup_iters': 500,
849-
'warmup_factor': 0.00066667,
850-
## Epoch
851-
'max_epoch': 48, # 4x
852-
'lr_epoch': [32, 44], # 4x
853-
# ----------------- Input -----------------
854-
## Transforms
855-
'train_min_size': [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640], # short edge of image
856-
'train_min_size2': [400, 500, 600],
857-
'train_max_size': 900,
858-
'test_min_size': 512,
859-
'test_max_size': 736,
860-
'random_crop_size': [320, 608],
861-
## Pixel mean & std
862-
'pixel_mean': [0.485, 0.456, 0.406],
863-
'pixel_std': [0.229, 0.224, 0.225],
864-
## Transforms
865-
'detr_style': True,
866-
'trans_config': None,
867-
'normalize_coords': False,
868-
},
869-
870797
}

config/plain_detr_config.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# Plain DETR
2+
3+
plain_detr_cfg = {
4+
'rtpdetr_r50':{
5+
# ---------------- Model config ----------------
6+
## Model scale
7+
# Backbone
8+
'backbone': 'resnet50',
9+
'backbone_norm': 'FrozeBN',
10+
'pretrained': True,
11+
'mae_pretrained': True,
12+
'max_stride': 32,
13+
'out_stride': 16,
14+
# Transformer Ecndoer
15+
'hidden_dim': 256,
16+
'en_num_heads': 8,
17+
'en_num_layers': 6,
18+
'en_ffn_dim': 2048,
19+
'en_dropout': 0.1,
20+
'en_act': 'gelu',
21+
# Transformer Decoder
22+
'transformer': 'plain_detr_transformer',
23+
'de_num_heads': 8,
24+
'de_num_layers': 6,
25+
'de_ffn_dim': 2048,
26+
'de_dropout': 0.0,
27+
'de_act': 'gelu',
28+
'de_pre_norm': True,
29+
'rpe_hidden_dim': 512,
30+
'use_checkpoint': False,
31+
'proposal_feature_levels': 3,
32+
'proposal_tgt_strides': [8, 16, 32],
33+
'num_queries_one2one': 300,
34+
'num_queries_one2many': 1500,
35+
# ---------------- Assignment config ----------------
36+
'matcher_hpy': {'cost_class': 2.0,
37+
'cost_bbox': 1.0,
38+
'cost_giou': 2.0,},
39+
# ---------------- Loss config ----------------
40+
'k_one2many': 6,
41+
'lambda_one2many': 1.0,
42+
'loss_coeff': {'class': 2,
43+
'bbox': 1,
44+
'giou': 2,},
45+
# ----------------- Training -----------------
46+
## Optimizer
47+
'optimizer': 'adamw',
48+
'base_lr': 0.0002 / 16,
49+
'backbone_lr_ratio': 0.1,
50+
'momentum': None,
51+
'weight_decay': 0.05,
52+
'clip_max_norm': 0.1,
53+
## LR Scheduler
54+
'lr_scheduler': 'step',
55+
'warmup': 'linear',
56+
'warmup_iters': 1000,
57+
'warmup_factor': 0.00066667,
58+
## Training scheduler
59+
'scheduler': '1x',
60+
'max_epoch': 12, # 1x
61+
'lr_epoch': [11], # 1x
62+
# ----------------- Input -----------------
63+
## Transforms
64+
'train_min_size': [800], # short edge of image
65+
'train_min_size2': [400, 500, 600],
66+
'train_max_size': 1333,
67+
'test_min_size': 800,
68+
'test_max_size': 1333,
69+
'random_crop_size': [320, 600],
70+
## Pixel mean & std
71+
'pixel_mean': [0.485, 0.456, 0.406],
72+
'pixel_std': [0.229, 0.224, 0.225],
73+
## Transforms
74+
'detr_style': True,
75+
'trans_config': None,
76+
'normalize_coords': False,
77+
},
78+
79+
}

config/retinanet_config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
'backbone_norm': 'FrozeBN',
1111
'res5_dilation': False,
1212
'pretrained': True,
13+
'mae_pretrained': False,
1314
'pretrained_weight': 'imagenet1k_v1',
1415
'max_stride': 128,
1516
'out_stride': [8, 16, 32, 64, 128],
@@ -92,6 +93,7 @@
9293
'backbone_norm': 'FrozeBN',
9394
'res5_dilation': False,
9495
'pretrained': True,
96+
'mae_pretrained': False,
9597
'pretrained_weight': 'imagenet1k_v1',
9698
'max_stride': 128,
9799
'out_stride': [8, 16, 32, 64, 128],
@@ -174,6 +176,7 @@
174176
'backbone_norm': 'FrozeBN',
175177
'res5_dilation': False,
176178
'pretrained': True,
179+
'mae_pretrained': False,
177180
'pretrained_weight': 'imagenet1k_v1',
178181
'max_stride': 128,
179182
'out_stride': [8, 16, 32, 64, 128],
@@ -256,6 +259,7 @@
256259
'backbone_norm': 'FrozeBN',
257260
'res5_dilation': False,
258261
'pretrained': True,
262+
'mae_pretrained': False,
259263
'pretrained_weight': 'imagenet1k_v1',
260264
'max_stride': 128,
261265
'out_stride': [8, 16, 32, 64, 128],
@@ -338,6 +342,7 @@
338342
'backbone_norm': 'FrozeBN',
339343
'res5_dilation': False,
340344
'pretrained': True,
345+
'mae_pretrained': False,
341346
'pretrained_weight': 'imagenet1k_v1',
342347
'max_stride': 128,
343348
'out_stride': [8, 16, 32, 64, 128],
@@ -420,6 +425,7 @@
420425
'backbone_norm': 'FrozeBN',
421426
'res5_dilation': False,
422427
'pretrained': True,
428+
'mae_pretrained': False,
423429
'pretrained_weight': 'imagenet1k_v1',
424430
'max_stride': 128,
425431
'out_stride': [8, 16, 32, 64, 128],

config/yolof_config.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
'backbone_norm': 'FrozeBN',
1212
'res5_dilation': False,
1313
'pretrained': True,
14+
'mae_pretrained': False,
1415
'pretrained_weight': 'imagenet1k_v1',
1516
'max_stride': 32,
1617
'out_stride': 32,
@@ -93,6 +94,7 @@
9394
'backbone_norm': 'FrozeBN',
9495
'res5_dilation': False,
9596
'pretrained': True,
97+
'mae_pretrained': False,
9698
'pretrained_weight': 'imagenet1k_v1',
9799
'max_stride': 32,
98100
'out_stride': 32,
@@ -175,6 +177,7 @@
175177
'backbone_norm': 'FrozeBN',
176178
'res5_dilation': False,
177179
'pretrained': True,
180+
'mae_pretrained': False,
178181
'pretrained_weight': 'imagenet1k_v1',
179182
'max_stride': 32,
180183
'out_stride': 32,
@@ -258,6 +261,7 @@
258261
'backbone_norm': 'FrozeBN',
259262
'res5_dilation': True,
260263
'pretrained': True,
264+
'mae_pretrained': False,
261265
'pretrained_weight': 'imagenet1k_v1',
262266
'max_stride': 16,
263267
'out_stride': 16,
@@ -340,6 +344,7 @@
340344
'backbone_norm': 'FrozeBN',
341345
'res5_dilation': True,
342346
'pretrained': True,
347+
'mae_pretrained': False,
343348
'pretrained_weight': 'imagenet1k_v1',
344349
'max_stride': 16,
345350
'out_stride': 16,
@@ -425,6 +430,7 @@
425430
'backbone_norm': 'FrozeBN',
426431
'res5_dilation': False,
427432
'pretrained': True,
433+
'mae_pretrained': False,
428434
'pretrained_weight': 'imagenet1k_v2',
429435
'max_stride': 32,
430436
'out_stride': 32,
@@ -507,6 +513,7 @@
507513
'backbone_norm': 'FrozeBN',
508514
'res5_dilation': False,
509515
'pretrained': True,
516+
'mae_pretrained': False,
510517
'pretrained_weight': 'imagenet1k_v2',
511518
'max_stride': 32,
512519
'out_stride': 32,
@@ -590,6 +597,7 @@
590597
'backbone_norm': 'FrozeBN',
591598
'res5_dilation': True,
592599
'pretrained': True,
600+
'mae_pretrained': False,
593601
'pretrained_weight': 'imagenet1k_v2',
594602
'max_stride': 16,
595603
'out_stride': 16,
@@ -672,6 +680,7 @@
672680
'backbone_norm': 'FrozeBN',
673681
'res5_dilation': True,
674682
'pretrained': True,
683+
'mae_pretrained': False,
675684
'pretrained_weight': 'imagenet1k_v2',
676685
'max_stride': 16,
677686
'out_stride': 16,

0 commit comments

Comments
 (0)