Skip to content

Commit 94569c4

Browse files
committed
update RT-DETR
1 parent 24bf87c commit 94569c4

File tree

12 files changed

+279
-120
lines changed

12 files changed

+279
-120
lines changed

config/fcos_config.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
'backbone_norm': 'FrozeBN',
1111
'res5_dilation': False,
1212
'pretrained': True,
13+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
1314
'pretrained_weight': 'imagenet1k_v1',
1415
'max_stride': 128,
1516
'out_stride': [8, 16, 32, 64, 128],
@@ -89,6 +90,7 @@
8990
'backbone_norm': 'FrozeBN',
9091
'res5_dilation': False,
9192
'pretrained': True,
93+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
9294
'pretrained_weight': 'imagenet1k_v1',
9395
'max_stride': 128,
9496
'out_stride': [8, 16, 32, 64, 128],
@@ -168,6 +170,7 @@
168170
'backbone_norm': 'FrozeBN',
169171
'res5_dilation': False,
170172
'pretrained': True,
173+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
171174
'pretrained_weight': 'imagenet1k_v1',
172175
'max_stride': 128,
173176
'out_stride': [8, 16, 32, 64, 128],
@@ -247,6 +250,7 @@
247250
'backbone_norm': 'FrozeBN',
248251
'res5_dilation': False,
249252
'pretrained': True,
253+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
250254
'pretrained_weight': 'imagenet1k_v1',
251255
'max_stride': 128,
252256
'out_stride': [8, 16, 32, 64, 128],
@@ -326,6 +330,7 @@
326330
'backbone_norm': 'FrozeBN',
327331
'res5_dilation': False,
328332
'pretrained': True,
333+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
329334
'pretrained_weight': 'imagenet1k_v1',
330335
'max_stride': 128,
331336
'out_stride': [8, 16, 32, 64, 128],
@@ -405,6 +410,7 @@
405410
'backbone_norm': 'FrozeBN',
406411
'res5_dilation': False,
407412
'pretrained': True,
413+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
408414
'pretrained_weight': 'imagenet1k_v1',
409415
'max_stride': 128,
410416
'out_stride': [8, 16, 32, 64, 128],
@@ -485,6 +491,7 @@
485491
'backbone_norm': 'FrozeBN',
486492
'res5_dilation': False,
487493
'pretrained': True,
494+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
488495
'pretrained_weight': 'imagenet1k_v2',
489496
'max_stride': 128,
490497
'out_stride': [8, 16, 32, 64, 128],
@@ -564,6 +571,7 @@
564571
'backbone_norm': 'FrozeBN',
565572
'res5_dilation': False,
566573
'pretrained': True,
574+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
567575
'pretrained_weight': 'imagenet1k_v2',
568576
'max_stride': 128,
569577
'out_stride': [8, 16, 32, 64, 128],
@@ -643,6 +651,7 @@
643651
'backbone_norm': 'FrozeBN',
644652
'res5_dilation': False,
645653
'pretrained': True,
654+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
646655
'pretrained_weight': 'imagenet1k_v2',
647656
'max_stride': 128,
648657
'out_stride': [8, 16, 32, 64, 128],
@@ -722,6 +731,7 @@
722731
'backbone_norm': 'FrozeBN',
723732
'res5_dilation': False,
724733
'pretrained': True,
734+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
725735
'pretrained_weight': 'imagenet1k_v2',
726736
'max_stride': 128,
727737
'out_stride': [8, 16, 32, 64, 128],

config/plain_detr_config.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
'res5_dilation': False,
1111
'pretrained': True,
1212
'pretrained_weight': 'spark_resnet50', # Cls: imagenet1k_v2; MIM: spark_resnet50
13+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
1314
'max_stride': 32,
1415
'out_stride': 16,
1516
# Transformer Ecndoer
@@ -36,11 +37,11 @@
3637
'num_queries_one2many': 1500,
3738
# Post process
3839
'train_topk': 300,
39-
'train_conf_thresh': 0.05,
40-
'train_nms_thresh': 0.6,
40+
'train_conf_thresh': 0.001,
41+
'train_nms_thresh': 0.5,
4142
'test_topk': 300,
42-
'test_conf_thresh': 0.3,
43-
'test_nms_thresh': 0.45,
43+
'test_conf_thresh': 0.001,
44+
'test_nms_thresh': 0.5,
4445
'nms_class_agnostic': True, # We prefer to use class-agnostic NMS in the demo.
4546
# ---------------- Assignment config ----------------
4647
'matcher_hpy': {'cost_class': 2.0,

config/retinanet_config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
'res5_dilation': False,
1212
'pretrained': True,
1313
'pretrained_weight': 'imagenet1k_v1',
14+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
1415
'max_stride': 128,
1516
'out_stride': [8, 16, 32, 64, 128],
1617
## Neck
@@ -94,6 +95,7 @@
9495
'res5_dilation': False,
9596
'pretrained': True,
9697
'pretrained_weight': 'imagenet1k_v1',
98+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
9799
'max_stride': 128,
98100
'out_stride': [8, 16, 32, 64, 128],
99101
## Neck
@@ -177,6 +179,7 @@
177179
'res5_dilation': False,
178180
'pretrained': True,
179181
'pretrained_weight': 'imagenet1k_v1',
182+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
180183
'max_stride': 128,
181184
'out_stride': [8, 16, 32, 64, 128],
182185
## Neck
@@ -260,6 +263,7 @@
260263
'res5_dilation': False,
261264
'pretrained': True,
262265
'pretrained_weight': 'imagenet1k_v1',
266+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
263267
'max_stride': 128,
264268
'out_stride': [8, 16, 32, 64, 128],
265269
## Neck
@@ -343,6 +347,7 @@
343347
'res5_dilation': False,
344348
'pretrained': True,
345349
'pretrained_weight': 'imagenet1k_v1',
350+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
346351
'max_stride': 128,
347352
'out_stride': [8, 16, 32, 64, 128],
348353
## Neck
@@ -426,6 +431,7 @@
426431
'res5_dilation': False,
427432
'pretrained': True,
428433
'pretrained_weight': 'imagenet1k_v1',
434+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
429435
'max_stride': 128,
430436
'out_stride': [8, 16, 32, 64, 128],
431437
## Neck

config/rtdetr_config.py

Lines changed: 116 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@
66
## Model scale
77
# Backbone
88
'backbone': 'resnet18',
9-
'backbone_norm': 'FrozeBN',
9+
'backbone_norm': 'BN',
1010
'res5_dilation': False,
1111
'pretrained': True,
1212
'pretrained_weight': 'imagenet1k_v1',
13+
'freeze_at': -1, # freeze none layer of the backbone
1314
'out_stride': [8, 16, 32],
1415
'max_stride': 32,
1516
'hidden_dim': 256,
@@ -18,7 +19,7 @@
1819
'fpn_num_blocks': 3,
1920
'fpn_expansion': 0.5,
2021
'fpn_act': 'relu',
21-
'fpn_norm': 'GN',
22+
'fpn_norm': 'BN',
2223
'fpn_depthwise': False,
2324
'en_num_heads': 8,
2425
'en_num_layers': 1,
@@ -44,18 +45,126 @@
4445
'dn_box_noise_scale': 1,
4546
# Post process
4647
'train_topk': 300,
47-
'train_conf_thresh': 0.05,
48-
'train_nms_thresh': 0.6,
48+
'train_conf_thresh': 0.001,
49+
'train_nms_thresh': 0.5,
50+
'test_topk': 300,
51+
'test_conf_thresh': 0.001,
52+
'test_nms_thresh': 0.5,
53+
'nms_class_agnostic': True, # We prefer to use class-agnostic NMS in the demo.
54+
# ---------------- Assignment config ----------------
55+
'matcher_hpy': {'cost_class': 2.0,
56+
'cost_bbox': 5.0,
57+
'cost_giou': 2.0,},
58+
# ---------------- Loss config ----------------
59+
'loss_coeff': {'class': 1.0,
60+
'bbox': 5.0,
61+
'giou': 2.0,},
62+
# ----------------- Training -----------------
63+
## Optimizer
64+
'optimizer': 'adamw',
65+
'base_lr': 0.0001 / 16,
66+
'backbone_lr_ratio': 0.1,
67+
'momentum': None,
68+
'weight_decay': 0.0001,
69+
'clip_max_norm': 0.1,
70+
'lr_backbone_names': ['backbone',],
71+
'lr_linear_proj_names': ["reference_points", "sampling_offsets",], # These two names are not required by PlainDETR
72+
'lr_linear_proj_mult': 0.1,
73+
'wd_norm_names': ["norm", "bias", "level_embed",],
74+
'wd_norm_mult': 0.0,
75+
## LR Scheduler
76+
'lr_scheduler': 'step',
77+
'warmup': 'linear',
78+
'warmup_iters': 2000,
79+
'warmup_factor': 0.00066667,
80+
## Model EMA
81+
'use_ema': True,
82+
'ema_decay': 0.9999,
83+
'ema_tau': 2000,
84+
## Training scheduler
85+
'scheduler': '6x',
86+
'max_epoch': 72, # 6x
87+
'lr_epoch': [66], # 6x
88+
# ----------------- Input -----------------
89+
## Transforms
90+
'train_min_size': [[640, 640]], # short edge of image
91+
'train_max_size': 640,
92+
'test_min_size': [[640, 640]],
93+
'test_max_size': 640,
94+
'random_size': [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800],
95+
## Pixel mean & std
96+
'pixel_mean': [0.485, 0.456, 0.406],
97+
'pixel_std': [0.229, 0.224, 0.225],
98+
## Transforms
99+
'detr_style': False,
100+
'trans_config': [
101+
{'name': 'RandomPhotometricDistort', 'prob': 0.5},
102+
{'name': 'RandomZoomOut', 'fill': [123.675, 116.28, 103.53]},
103+
{'name': 'RandomIoUCrop', 'prob': 0.8},
104+
{'name': 'RandomHFlip'},
105+
{'name': 'RandomResize'},
106+
{'name': 'RefineBBox', 'min_box_size': 1},
107+
],
108+
'box_format': 'xywh',
109+
'normalize_coords': True,
110+
},
111+
112+
'rtdetr_r50':{
113+
# ---------------- Model config ----------------
114+
## Model scale
115+
# Backbone
116+
'backbone': 'resnet50',
117+
'backbone_norm': 'FrozeBN',
118+
'res5_dilation': False,
119+
'pretrained': True,
120+
'pretrained_weight': 'imagenet1k_v2',
121+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
122+
'out_stride': [8, 16, 32],
123+
'max_stride': 32,
124+
'hidden_dim': 256,
125+
# Transformer Ecndoer
126+
'neck': 'hybrid_encoder',
127+
'fpn_num_blocks': 3,
128+
'fpn_expansion': 1.0,
129+
'fpn_act': 'relu',
130+
'fpn_norm': 'BN',
131+
'fpn_depthwise': False,
132+
'en_num_heads': 8,
133+
'en_num_layers': 1,
134+
'en_ffn_dim': 1024,
135+
'en_dropout': 0.0,
136+
'pe_temperature': 10000.,
137+
'en_act': 'gelu',
138+
'en_pre_norm': False,
139+
# Transformer Decoder
140+
'transformer': 'rtdetr_transformer',
141+
'de_num_heads': 8,
142+
'de_num_layers': 6,
143+
'de_ffn_dim': 1024,
144+
'de_dropout': 0.0,
145+
'de_act': 'relu',
146+
'de_pre_norm': False,
147+
'de_num_points': 4,
148+
'num_queries': 300,
149+
'learnt_init_query': False,
150+
'pe_temperature': 10000.,
151+
'dn_num_denoising': 100,
152+
'dn_label_noise_ratio': 0.5,
153+
'dn_box_noise_scale': 1,
154+
# Post process
155+
'train_topk': 300,
156+
'train_conf_thresh': 0.001,
157+
'train_nms_thresh': 0.5,
49158
'test_topk': 300,
50-
'test_conf_thresh': 0.3,
51-
'test_nms_thresh': 0.45,
159+
'test_conf_thresh': 0.001,
160+
'test_nms_thresh': 0.5,
52161
'nms_class_agnostic': True, # We prefer to use class-agnostic NMS in the demo.
53162
# ---------------- Assignment config ----------------
54163
'matcher_hpy': {'cost_class': 2.0,
55164
'cost_bbox': 5.0,
56165
'cost_giou': 2.0,},
57166
# ---------------- Loss config ----------------
58-
'loss_coeff': {'class': 2.0,
167+
'loss_coeff': {'class': 1.0,
59168
'bbox': 5.0,
60169
'giou': 2.0,},
61170
# ----------------- Training -----------------

config/yolof_config.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
'res5_dilation': False,
1313
'pretrained': True,
1414
'pretrained_weight': 'imagenet1k_v1',
15+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
1516
'max_stride': 32,
1617
'out_stride': 32,
1718
## Neck
@@ -95,6 +96,7 @@
9596
'res5_dilation': False,
9697
'pretrained': True,
9798
'pretrained_weight': 'imagenet1k_v1',
99+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
98100
'max_stride': 32,
99101
'out_stride': 32,
100102
## Neck
@@ -178,6 +180,7 @@
178180
'res5_dilation': False,
179181
'pretrained': True,
180182
'pretrained_weight': 'imagenet1k_v1',
183+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
181184
'max_stride': 32,
182185
'out_stride': 32,
183186
## Neck
@@ -262,6 +265,7 @@
262265
'res5_dilation': True,
263266
'pretrained': True,
264267
'pretrained_weight': 'imagenet1k_v1',
268+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
265269
'max_stride': 16,
266270
'out_stride': 16,
267271
## Neck
@@ -345,6 +349,7 @@
345349
'res5_dilation': True,
346350
'pretrained': True,
347351
'pretrained_weight': 'imagenet1k_v1',
352+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
348353
'max_stride': 16,
349354
'out_stride': 16,
350355
## Neck
@@ -431,6 +436,7 @@
431436
'res5_dilation': False,
432437
'pretrained': True,
433438
'pretrained_weight': 'imagenet1k_v2',
439+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
434440
'max_stride': 32,
435441
'out_stride': 32,
436442
## Neck
@@ -514,6 +520,7 @@
514520
'res5_dilation': False,
515521
'pretrained': True,
516522
'pretrained_weight': 'imagenet1k_v2',
523+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
517524
'max_stride': 32,
518525
'out_stride': 32,
519526
## Neck
@@ -598,6 +605,7 @@
598605
'res5_dilation': True,
599606
'pretrained': True,
600607
'pretrained_weight': 'imagenet1k_v2',
608+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
601609
'max_stride': 16,
602610
'out_stride': 16,
603611
## Neck
@@ -681,6 +689,7 @@
681689
'res5_dilation': True,
682690
'pretrained': True,
683691
'pretrained_weight': 'imagenet1k_v2',
692+
'freeze_at': 1, # freeze stem layer + layer1 of the backbone
684693
'max_stride': 16,
685694
'out_stride': 16,
686695
## Neck

0 commit comments

Comments
 (0)