|
6 | 6 | ## Model scale
|
7 | 7 | # Backbone
|
8 | 8 | 'backbone': 'resnet18',
|
9 |
| - 'backbone_norm': 'FrozeBN', |
| 9 | + 'backbone_norm': 'BN', |
10 | 10 | 'res5_dilation': False,
|
11 | 11 | 'pretrained': True,
|
12 | 12 | 'pretrained_weight': 'imagenet1k_v1',
|
| 13 | + 'freeze_at': -1, # freeze none layer of the backbone |
13 | 14 | 'out_stride': [8, 16, 32],
|
14 | 15 | 'max_stride': 32,
|
15 | 16 | 'hidden_dim': 256,
|
|
18 | 19 | 'fpn_num_blocks': 3,
|
19 | 20 | 'fpn_expansion': 0.5,
|
20 | 21 | 'fpn_act': 'relu',
|
21 |
| - 'fpn_norm': 'GN', |
| 22 | + 'fpn_norm': 'BN', |
22 | 23 | 'fpn_depthwise': False,
|
23 | 24 | 'en_num_heads': 8,
|
24 | 25 | 'en_num_layers': 1,
|
|
44 | 45 | 'dn_box_noise_scale': 1,
|
45 | 46 | # Post process
|
46 | 47 | 'train_topk': 300,
|
47 |
| - 'train_conf_thresh': 0.05, |
48 |
| - 'train_nms_thresh': 0.6, |
| 48 | + 'train_conf_thresh': 0.001, |
| 49 | + 'train_nms_thresh': 0.5, |
| 50 | + 'test_topk': 300, |
| 51 | + 'test_conf_thresh': 0.001, |
| 52 | + 'test_nms_thresh': 0.5, |
| 53 | + 'nms_class_agnostic': True, # We prefer to use class-agnostic NMS in the demo. |
| 54 | + # ---------------- Assignment config ---------------- |
| 55 | + 'matcher_hpy': {'cost_class': 2.0, |
| 56 | + 'cost_bbox': 5.0, |
| 57 | + 'cost_giou': 2.0,}, |
| 58 | + # ---------------- Loss config ---------------- |
| 59 | + 'loss_coeff': {'class': 1.0, |
| 60 | + 'bbox': 5.0, |
| 61 | + 'giou': 2.0,}, |
| 62 | + # ----------------- Training ----------------- |
| 63 | + ## Optimizer |
| 64 | + 'optimizer': 'adamw', |
| 65 | + 'base_lr': 0.0001 / 16, |
| 66 | + 'backbone_lr_ratio': 0.1, |
| 67 | + 'momentum': None, |
| 68 | + 'weight_decay': 0.0001, |
| 69 | + 'clip_max_norm': 0.1, |
| 70 | + 'lr_backbone_names': ['backbone',], |
| 71 | + 'lr_linear_proj_names': ["reference_points", "sampling_offsets",], # These two names are not required by PlainDETR |
| 72 | + 'lr_linear_proj_mult': 0.1, |
| 73 | + 'wd_norm_names': ["norm", "bias", "level_embed",], |
| 74 | + 'wd_norm_mult': 0.0, |
| 75 | + ## LR Scheduler |
| 76 | + 'lr_scheduler': 'step', |
| 77 | + 'warmup': 'linear', |
| 78 | + 'warmup_iters': 2000, |
| 79 | + 'warmup_factor': 0.00066667, |
| 80 | + ## Model EMA |
| 81 | + 'use_ema': True, |
| 82 | + 'ema_decay': 0.9999, |
| 83 | + 'ema_tau': 2000, |
| 84 | + ## Training scheduler |
| 85 | + 'scheduler': '6x', |
| 86 | + 'max_epoch': 72, # 6x |
| 87 | + 'lr_epoch': [66], # 6x |
| 88 | + # ----------------- Input ----------------- |
| 89 | + ## Transforms |
| 90 | + 'train_min_size': [[640, 640]], # short edge of image |
| 91 | + 'train_max_size': 640, |
| 92 | + 'test_min_size': [[640, 640]], |
| 93 | + 'test_max_size': 640, |
| 94 | + 'random_size': [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800], |
| 95 | + ## Pixel mean & std |
| 96 | + 'pixel_mean': [0.485, 0.456, 0.406], |
| 97 | + 'pixel_std': [0.229, 0.224, 0.225], |
| 98 | + ## Transforms |
| 99 | + 'detr_style': False, |
| 100 | + 'trans_config': [ |
| 101 | + {'name': 'RandomPhotometricDistort', 'prob': 0.5}, |
| 102 | + {'name': 'RandomZoomOut', 'fill': [123.675, 116.28, 103.53]}, |
| 103 | + {'name': 'RandomIoUCrop', 'prob': 0.8}, |
| 104 | + {'name': 'RandomHFlip'}, |
| 105 | + {'name': 'RandomResize'}, |
| 106 | + {'name': 'RefineBBox', 'min_box_size': 1}, |
| 107 | + ], |
| 108 | + 'box_format': 'xywh', |
| 109 | + 'normalize_coords': True, |
| 110 | + }, |
| 111 | + |
| 112 | + 'rtdetr_r50':{ |
| 113 | + # ---------------- Model config ---------------- |
| 114 | + ## Model scale |
| 115 | + # Backbone |
| 116 | + 'backbone': 'resnet50', |
| 117 | + 'backbone_norm': 'FrozeBN', |
| 118 | + 'res5_dilation': False, |
| 119 | + 'pretrained': True, |
| 120 | + 'pretrained_weight': 'imagenet1k_v2', |
| 121 | + 'freeze_at': 1, # freeze stem layer + layer1 of the backbone |
| 122 | + 'out_stride': [8, 16, 32], |
| 123 | + 'max_stride': 32, |
| 124 | + 'hidden_dim': 256, |
| 125 | + # Transformer Ecndoer |
| 126 | + 'neck': 'hybrid_encoder', |
| 127 | + 'fpn_num_blocks': 3, |
| 128 | + 'fpn_expansion': 1.0, |
| 129 | + 'fpn_act': 'relu', |
| 130 | + 'fpn_norm': 'BN', |
| 131 | + 'fpn_depthwise': False, |
| 132 | + 'en_num_heads': 8, |
| 133 | + 'en_num_layers': 1, |
| 134 | + 'en_ffn_dim': 1024, |
| 135 | + 'en_dropout': 0.0, |
| 136 | + 'pe_temperature': 10000., |
| 137 | + 'en_act': 'gelu', |
| 138 | + 'en_pre_norm': False, |
| 139 | + # Transformer Decoder |
| 140 | + 'transformer': 'rtdetr_transformer', |
| 141 | + 'de_num_heads': 8, |
| 142 | + 'de_num_layers': 6, |
| 143 | + 'de_ffn_dim': 1024, |
| 144 | + 'de_dropout': 0.0, |
| 145 | + 'de_act': 'relu', |
| 146 | + 'de_pre_norm': False, |
| 147 | + 'de_num_points': 4, |
| 148 | + 'num_queries': 300, |
| 149 | + 'learnt_init_query': False, |
| 150 | + 'pe_temperature': 10000., |
| 151 | + 'dn_num_denoising': 100, |
| 152 | + 'dn_label_noise_ratio': 0.5, |
| 153 | + 'dn_box_noise_scale': 1, |
| 154 | + # Post process |
| 155 | + 'train_topk': 300, |
| 156 | + 'train_conf_thresh': 0.001, |
| 157 | + 'train_nms_thresh': 0.5, |
49 | 158 | 'test_topk': 300,
|
50 |
| - 'test_conf_thresh': 0.3, |
51 |
| - 'test_nms_thresh': 0.45, |
| 159 | + 'test_conf_thresh': 0.001, |
| 160 | + 'test_nms_thresh': 0.5, |
52 | 161 | 'nms_class_agnostic': True, # We prefer to use class-agnostic NMS in the demo.
|
53 | 162 | # ---------------- Assignment config ----------------
|
54 | 163 | 'matcher_hpy': {'cost_class': 2.0,
|
55 | 164 | 'cost_bbox': 5.0,
|
56 | 165 | 'cost_giou': 2.0,},
|
57 | 166 | # ---------------- Loss config ----------------
|
58 |
| - 'loss_coeff': {'class': 2.0, |
| 167 | + 'loss_coeff': {'class': 1.0, |
59 | 168 | 'bbox': 5.0,
|
60 | 169 | 'giou': 2.0,},
|
61 | 170 | # ----------------- Training -----------------
|
|
0 commit comments