Skip to content

Commit 93659a2

Browse files
committed
debug Plain-DETR
1 parent 33983a0 commit 93659a2

File tree

11 files changed

+120
-37
lines changed

11 files changed

+120
-37
lines changed

config/fcos_config.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
{'name': 'RandomHFlip'},
7979
{'name': 'RandomResize'},
8080
],
81+
'box_format': 'xyxy',
8182
'normalize_coords': False,
8283
},
8384

@@ -156,6 +157,7 @@
156157
{'name': 'RandomHFlip'},
157158
{'name': 'RandomResize'},
158159
],
160+
'box_format': 'xyxy',
159161
'normalize_coords': False,
160162
},
161163

@@ -234,6 +236,7 @@
234236
{'name': 'RandomHFlip'},
235237
{'name': 'RandomResize'},
236238
],
239+
'box_format': 'xyxy',
237240
'normalize_coords': False,
238241
},
239242

@@ -312,6 +315,7 @@
312315
{'name': 'RandomHFlip'},
313316
{'name': 'RandomResize'},
314317
],
318+
'box_format': 'xyxy',
315319
'normalize_coords': False,
316320
},
317321

@@ -390,6 +394,7 @@
390394
{'name': 'RandomHFlip'},
391395
{'name': 'RandomResize'},
392396
],
397+
'box_format': 'xyxy',
393398
'normalize_coords': False,
394399
},
395400

@@ -468,6 +473,7 @@
468473
{'name': 'RandomHFlip'},
469474
{'name': 'RandomResize'},
470475
],
476+
'box_format': 'xyxy',
471477
'normalize_coords': False,
472478
},
473479

@@ -547,6 +553,7 @@
547553
{'name': 'RandomHFlip'},
548554
{'name': 'RandomResize'},
549555
],
556+
'box_format': 'xyxy',
550557
'normalize_coords': False,
551558
},
552559

@@ -625,6 +632,7 @@
625632
{'name': 'RandomHFlip'},
626633
{'name': 'RandomResize'},
627634
],
635+
'box_format': 'xyxy',
628636
'normalize_coords': False,
629637
},
630638

@@ -703,6 +711,7 @@
703711
{'name': 'RandomHFlip'},
704712
{'name': 'RandomResize'},
705713
],
714+
'box_format': 'xyxy',
706715
'normalize_coords': False,
707716
},
708717

@@ -781,6 +790,7 @@
781790
{'name': 'RandomHFlip'},
782791
{'name': 'RandomResize'},
783792
],
793+
'box_format': 'xyxy',
784794
'normalize_coords': False,
785795
},
786796

config/plain_detr_config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
'en_ffn_dim': 2048,
2020
'en_dropout': 0.1,
2121
'en_act': 'gelu',
22+
'en_pre_norm': True,
2223
# Transformer Decoder
2324
'transformer': 'plain_detr_transformer',
2425
'de_num_heads': 8,
@@ -81,6 +82,7 @@
8182
## Transforms
8283
'detr_style': True,
8384
'trans_config': None,
85+
'box_format': 'xywh',
8486
'normalize_coords': False,
8587
},
8688

config/retinanet_config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@
8282
{'name': 'RandomHFlip'},
8383
{'name': 'RandomResize'},
8484
],
85+
'box_format': 'xyxy',
8586
'normalize_coords': False,
8687
},
8788

@@ -164,6 +165,7 @@
164165
{'name': 'RandomHFlip'},
165166
{'name': 'RandomResize'},
166167
],
168+
'box_format': 'xyxy',
167169
'normalize_coords': False,
168170
},
169171

@@ -246,6 +248,7 @@
246248
{'name': 'RandomHFlip'},
247249
{'name': 'RandomResize'},
248250
],
251+
'box_format': 'xyxy',
249252
'normalize_coords': False,
250253
},
251254

@@ -328,6 +331,7 @@
328331
{'name': 'RandomHFlip'},
329332
{'name': 'RandomResize'},
330333
],
334+
'box_format': 'xyxy',
331335
'normalize_coords': False,
332336
},
333337

@@ -410,6 +414,7 @@
410414
{'name': 'RandomHFlip'},
411415
{'name': 'RandomResize'},
412416
],
417+
'box_format': 'xyxy',
413418
'normalize_coords': False,
414419
},
415420

@@ -492,6 +497,7 @@
492497
{'name': 'RandomHFlip'},
493498
{'name': 'RandomResize'},
494499
],
500+
'box_format': 'xyxy',
495501
'normalize_coords': False,
496502
},
497503

config/yolof_config.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383
{'name': 'RandomResize'},
8484
{'name': 'RandomShift', 'max_shift': 32},
8585
],
86+
'box_format': 'xyxy',
8687
'normalize_coords': False,
8788
},
8889

@@ -165,6 +166,7 @@
165166
{'name': 'RandomResize'},
166167
{'name': 'RandomShift', 'max_shift': 32},
167168
],
169+
'box_format': 'xyxy',
168170
'normalize_coords': False,
169171
},
170172

@@ -247,6 +249,7 @@
247249
{'name': 'RandomResize'},
248250
{'name': 'RandomShift', 'max_shift': 32},
249251
],
252+
'box_format': 'xyxy',
250253
'normalize_coords': False,
251254
},
252255

@@ -330,6 +333,7 @@
330333
{'name': 'RandomResize'},
331334
{'name': 'RandomShift', 'max_shift': 32},
332335
],
336+
'box_format': 'xyxy',
333337
'normalize_coords': False,
334338
},
335339

@@ -412,6 +416,7 @@
412416
{'name': 'RandomResize'},
413417
{'name': 'RandomShift', 'max_shift': 32},
414418
],
419+
'box_format': 'xyxy',
415420
'normalize_coords': False,
416421
},
417422

@@ -497,6 +502,7 @@
497502
{'name': 'RandomResize'},
498503
{'name': 'RandomShift', 'max_shift': 32},
499504
],
505+
'box_format': 'xyxy',
500506
'normalize_coords': False,
501507
},
502508

@@ -579,6 +585,7 @@
579585
{'name': 'RandomResize'},
580586
{'name': 'RandomShift', 'max_shift': 32},
581587
],
588+
'box_format': 'xyxy',
582589
'normalize_coords': False,
583590
},
584591

@@ -662,6 +669,7 @@
662669
{'name': 'RandomResize'},
663670
{'name': 'RandomShift', 'max_shift': 32},
664671
],
672+
'box_format': 'xyxy',
665673
'normalize_coords': False,
666674
},
667675

@@ -744,6 +752,7 @@
744752
{'name': 'RandomResize'},
745753
{'name': 'RandomShift', 'max_shift': 32},
746754
],
755+
'box_format': 'xyxy',
747756
'normalize_coords': False,
748757
},
749758

datasets/coco.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ def build_coco(args, transform=None, is_train=False):
133133
{'name': 'RandomHFlip'},
134134
{'name': 'RandomShift', 'max_shift': 100}
135135
],
136+
'box_format': 'xywh',
136137
'normalize_coords': False,
137138
}
138139

@@ -155,6 +156,10 @@ def build_coco(args, transform=None, is_train=False):
155156
if cfg['normalize_coords']:
156157
box[..., [0, 2]] *= orig_w
157158
box[..., [1, 3]] *= orig_h
159+
if cfg['box_format'] == 'xywh':
160+
box_x1y1 = box[..., :2] - box[..., 2:] * 0.5
161+
box_x2y2 = box[..., :2] + box[..., 2:] * 0.5
162+
box = torch.cat([box_x1y1, box_x2y2], dim=-1)
158163
# get box target
159164
x1, y1, x2, y2 = box.long()
160165
# get class label

datasets/transforms.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,26 @@ def __call__(self, image, target=None):
250250
target["boxes"] = boxes
251251
return image, target
252252

253+
class ConvertBoxFormat(object):
254+
def __init__(self, box_format="xyxy"):
255+
self.box_format = box_format
256+
257+
def __call__(self, image, target=None):
258+
if self.box_format == "xyxy" or target is None:
259+
pass
260+
elif self.box_format == "xywh":
261+
target = target.copy()
262+
if "boxes" in target:
263+
boxes_xyxy = target["boxes"]
264+
boxes_xywh = torch.zeros_like(boxes_xyxy)
265+
boxes_xywh[..., :2] = (boxes_xyxy[..., :2] + boxes_xyxy[..., 2:]) * 0.5 # cxcy
266+
boxes_xywh[..., 2:] = boxes_xyxy[..., 2:] - boxes_xyxy[..., :2] # bwbh
267+
target["boxes"] = boxes_xywh
268+
else:
269+
raise NotImplementedError("Unknown box format: {}".format(self.box_format))
270+
271+
return image, target
272+
253273
class Compose(object):
254274
def __init__(self, transforms):
255275
self.transforms = transforms
@@ -287,7 +307,8 @@ def build_transform(cfg=None, is_train=False):
287307
transforms.append(RandomShift(max_shift=t['max_shift']))
288308
transforms.extend([
289309
ToTensor(),
290-
Normalize(cfg['pixel_mean'], cfg['pixel_std'], cfg['normalize_coords'])
310+
Normalize(cfg['pixel_mean'], cfg['pixel_std'], cfg['normalize_coords']),
311+
ConvertBoxFormat(cfg['box_format'])
291312
])
292313
# build transform for DETR-style detector
293314
else:
@@ -302,15 +323,17 @@ def build_transform(cfg=None, is_train=False):
302323
])
303324
),
304325
ToTensor(),
305-
Normalize(cfg['pixel_mean'], cfg['pixel_std'], cfg['normalize_coords'])
326+
Normalize(cfg['pixel_mean'], cfg['pixel_std'], cfg['normalize_coords']),
327+
ConvertBoxFormat(cfg['box_format'])
306328
]
307329

308330
# ---------------- Transform for Evaluating ----------------
309331
else:
310332
transforms = [
311333
RandomResize([cfg['test_min_size']], max_size=cfg['test_max_size']),
312334
ToTensor(),
313-
Normalize(cfg['pixel_mean'], cfg['pixel_std'], cfg['normalize_coords'])
335+
Normalize(cfg['pixel_mean'], cfg['pixel_std'], cfg['normalize_coords']),
336+
ConvertBoxFormat(cfg['box_format'])
314337
]
315338

316339
return Compose(transforms)

engine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def train_one_epoch(cfg,
5454

5555
# Visualize train targets
5656
if vis_target:
57-
vis_data(images, targets, masks, class_labels, cfg['normalize_coords'])
57+
vis_data(images, targets, masks, class_labels, cfg['normalize_coords'], cfg['box_format'])
5858

5959
# Inference
6060
outputs = model(images, masks)

models/basic/transformer.py

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,12 @@ def build_transformer(cfg, return_intermediate=False):
7272
# ----------------- Transformer Encoder modules -----------------
7373
class TransformerEncoderLayer(nn.Module):
7474
def __init__(self,
75-
d_model :int = 256,
76-
num_heads :int = 8,
77-
ffn_dim :int = 1024,
78-
dropout :float = 0.1,
79-
act_type :str = "relu",
75+
d_model :int = 256,
76+
num_heads :int = 8,
77+
ffn_dim :int = 1024,
78+
dropout :float = 0.1,
79+
act_type :str = "relu",
80+
pre_norm :bool = False,
8081
):
8182
super().__init__()
8283
# ----------- Basic parameters -----------
@@ -85,6 +86,7 @@ def __init__(self,
8586
self.ffn_dim = ffn_dim
8687
self.dropout = dropout
8788
self.act_type = act_type
89+
self.pre_norm = pre_norm
8890
# ----------- Basic parameters -----------
8991
# Multi-head Self-Attn
9092
self.self_attn = nn.MultiheadAttention(d_model, num_heads, dropout=dropout, batch_first=True)
@@ -97,7 +99,27 @@ def __init__(self,
9799
def with_pos_embed(self, tensor, pos):
98100
return tensor if pos is None else tensor + pos
99101

100-
def forward(self, src, pos_embed):
102+
def forward_pre_norm(self, src, pos_embed):
103+
"""
104+
Input:
105+
src: [torch.Tensor] -> [B, N, C]
106+
pos_embed: [torch.Tensor] -> [B, N, C]
107+
Output:
108+
src: [torch.Tensor] -> [B, N, C]
109+
"""
110+
src = self.norm(src)
111+
q = k = self.with_pos_embed(src, pos_embed)
112+
113+
# -------------- MHSA --------------
114+
src2 = self.self_attn(q, k, value=src)[0]
115+
src = src + self.dropout(src2)
116+
117+
# -------------- FFN --------------
118+
src = self.ffn(src)
119+
120+
return src
121+
122+
def forward_post_norm(self, src, pos_embed):
101123
"""
102124
Input:
103125
src: [torch.Tensor] -> [B, N, C]
@@ -117,15 +139,22 @@ def forward(self, src, pos_embed):
117139

118140
return src
119141

142+
def forward(self, src, pos_embed):
143+
if self.pre_norm:
144+
return self.forward_pre_norm(src, pos_embed)
145+
else:
146+
return self.forward_post_norm(src, pos_embed)
147+
120148
class TransformerEncoder(nn.Module):
121149
def __init__(self,
122150
d_model :int = 256,
123151
num_heads :int = 8,
124152
num_layers :int = 1,
125153
ffn_dim :int = 1024,
126-
pe_temperature : float = 10000.,
154+
pe_temperature :float = 10000.,
127155
dropout :float = 0.1,
128156
act_type :str = "relu",
157+
pre_norm :bool = False,
129158
):
130159
super().__init__()
131160
# ----------- Basic parameters -----------
@@ -135,11 +164,12 @@ def __init__(self,
135164
self.ffn_dim = ffn_dim
136165
self.dropout = dropout
137166
self.act_type = act_type
167+
self.pre_norm = pre_norm
138168
self.pe_temperature = pe_temperature
139169
self.pos_embed = None
140170
# ----------- Basic parameters -----------
141171
self.encoder_layers = get_clones(
142-
TransformerEncoderLayer(d_model, num_heads, ffn_dim, dropout, act_type), num_layers)
172+
TransformerEncoderLayer(d_model, num_heads, ffn_dim, dropout, act_type, pre_norm), num_layers)
143173

144174
def build_2d_sincos_position_embedding(self, device, w, h, embed_dim=256, temperature=10000.):
145175
assert embed_dim % 4 == 0, \

0 commit comments

Comments
 (0)