added pure computation inference time

vkhalidov · facebook-github-bot · commit dfdaae194ab5 · 2019-10-15T04:16:22.000-07:00
Summary:
It's more useful to report pure computation time as inference time of a model, rather than the total inference time including data loader and evaluator operations. The results tend to be more stable and reflect better model operation.

This adds a sync point to the inference code, which implies however little to no overhead, since synchronization would most likely happen during `evaluator.evaluate(inputs, outputs)` on the next line anyway.

Reviewed By: ppwwyyxx

Differential Revision: D17905849

fbshipit-source-id: ded18415f14b11c8d6aab3cd333cba7eb1126ed8
diff --git a/detectron2/evaluation/evaluator.py b/detectron2/evaluation/evaluator.py
@@ -107,12 +107,17 @@ def inference_on_dataset(model, data_loader, evaluator):
     logging_interval = 50
     num_warmup = min(5, logging_interval - 1, total - 1)
     start_time = time.time()
+    total_compute_time = 0
     with inference_context(model), torch.no_grad():
         for idx, inputs in enumerate(data_loader):
             if idx == num_warmup:
                 start_time = time.time()
+                total_compute_time = 0
 
+            start_compute_time = time.time()
             outputs = model(inputs)
+            torch.cuda.synchronize()
+            total_compute_time += time.time() - start_compute_time
             evaluator.process(inputs, outputs)
 
             if (idx + 1) % logging_interval == 0:
@@ -136,6 +141,12 @@ def inference_on_dataset(model, data_loader, evaluator):
             total_time_str, total_time / (total - num_warmup), num_devices
         )
     )
+    total_compute_time_str = str(datetime.timedelta(seconds=int(total_compute_time)))
+    logger.info(
+        "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)".format(
+            total_compute_time_str, total_compute_time / (total - num_warmup), num_devices
+        )
+    )
 
     results = evaluator.evaluate()
     # An evaluator may return None when not in main process.