Merge pull request #13 from paperswithcode/inference_speed

RJT1990 · web-flow · commit fd006c63bde3 · 2019-10-06T13:17:37.000+01:00
Inference speed
diff --git a/torchbench/image_classification/cifar10.py b/torchbench/image_classification/cifar10.py
@@ -64,7 +64,7 @@ def benchmark(
             num_workers=num_workers,
             pin_memory=True,
         )
-        test_results, run_hash = evaluate_classification(
+        test_results, speed_mem_metrics, run_hash = evaluate_classification(
             model=model,
             test_loader=test_loader,
             model_output_transform=model_output_transform,
@@ -84,6 +84,7 @@ def benchmark(
             config=config,
             dataset=cls.dataset.__name__,
             results=test_results,
+            speed_mem_metrics=speed_mem_metrics,
             pytorch_hub_id=pytorch_hub_url,
             model=paper_model_name,
             model_description=model_description,
diff --git a/torchbench/image_classification/cifar100.py b/torchbench/image_classification/cifar100.py
@@ -64,7 +64,7 @@ def benchmark(
             num_workers=num_workers,
             pin_memory=True,
         )
-        test_results, run_hash = evaluate_classification(
+        test_results, speed_mem_metrics, run_hash = evaluate_classification(
             model=model,
             test_loader=test_loader,
             model_output_transform=model_output_transform,
@@ -84,6 +84,7 @@ def benchmark(
             config=config,
             dataset=cls.dataset.__name__,
             results=test_results,
+            speed_mem_metrics=speed_mem_metrics,
             pytorch_hub_id=pytorch_hub_url,
             model=paper_model_name,
             model_description=model_description,
diff --git a/torchbench/image_classification/imagenet.py b/torchbench/image_classification/imagenet.py
@@ -200,7 +200,7 @@ def benchmark(
             num_workers=num_workers,
             pin_memory=pin_memory,
         )
-        test_results, run_hash = evaluate_classification(
+        test_results, speed_mem_metrics, run_hash = evaluate_classification(
             model=model,
             test_loader=test_loader,
             model_output_transform=model_output_transform,
@@ -220,6 +220,7 @@ def benchmark(
             config=config,
             dataset=cls.dataset.__name__,
             results=test_results,
+            speed_mem_metrics=speed_mem_metrics,
             pytorch_hub_id=pytorch_hub_url,
             model=paper_model_name,
             model_description=model_description,
diff --git a/torchbench/image_classification/mnist.py b/torchbench/image_classification/mnist.py
@@ -63,7 +63,7 @@ def benchmark(
             num_workers=num_workers,
             pin_memory=True,
         )
-        test_results, run_hash = evaluate_classification(
+        test_results, speed_mem_metrics, run_hash = evaluate_classification(
             model=model,
             test_loader=test_loader,
             model_output_transform=model_output_transform,
@@ -83,6 +83,7 @@ def benchmark(
             config=config,
             dataset=cls.dataset.__name__,
             results=test_results,
+            speed_mem_metrics=speed_mem_metrics,
             pytorch_hub_id=pytorch_hub_url,
             model=paper_model_name,
             model_description=model_description,
diff --git a/torchbench/image_classification/stl10.py b/torchbench/image_classification/stl10.py
@@ -63,7 +63,7 @@ def benchmark(
             num_workers=num_workers,
             pin_memory=True,
         )
-        test_results, run_hash = evaluate_classification(
+        test_results, speed_mem_metrics, run_hash = evaluate_classification(
             model=model,
             test_loader=test_loader,
             model_output_transform=model_output_transform,
@@ -83,6 +83,7 @@ def benchmark(
             config=config,
             dataset=cls.dataset.__name__,
             results=test_results,
+            speed_mem_metrics=speed_mem_metrics,
             pytorch_hub_id=pytorch_hub_url,
             model=paper_model_name,
             model_description=model_description,
diff --git a/torchbench/image_classification/svhn.py b/torchbench/image_classification/svhn.py
@@ -66,7 +66,7 @@ def benchmark(
             num_workers=num_workers,
             pin_memory=True,
         )
-        test_results, run_hash = evaluate_classification(
+        test_results, speed_mem_metrics, run_hash = evaluate_classification(
             model=model,
             test_loader=test_loader,
             model_output_transform=model_output_transform,
@@ -86,6 +86,7 @@ def benchmark(
             config=config,
             dataset=cls.dataset.__name__,
             results=test_results,
+            speed_mem_metrics=speed_mem_metrics,
             pytorch_hub_id=pytorch_hub_url,
             model=paper_model_name,
             model_description=model_description,
diff --git a/torchbench/image_classification/utils.py b/torchbench/image_classification/utils.py
@@ -1,5 +1,6 @@
 import time
 
+import numpy as np
 import tqdm
 import torch
 import torchvision
@@ -16,31 +17,31 @@ def evaluate_classification(
     send_data_to_device,
     device="cuda",
 ):
-    batch_time = AverageMeter()
+    inference_time = AverageMeter()
     top1 = AverageMeter()
     top5 = AverageMeter()
+    iterator = tqdm.tqdm(test_loader, desc="Evaluation", mininterval=5)
 
     end = time.time()
 
-    iterator = tqdm.tqdm(test_loader, desc="Evaluation", mininterval=5)
-
     with torch.no_grad():
         for i, (input, target) in enumerate(iterator):
 
             input, target = send_data_to_device(input, target, device=device)
             output = model(input)
 
+            inference_time.update(time.time() - end)
+
             if model_output_transform is not None:
                 output = model_output_transform(output, target, model=model)
 
             check_metric_inputs(output, target, test_loader.dataset, i)
             prec1, prec5 = accuracy(output, target, topk=(1, 5))
             top1.update(prec1.item(), input.size(0))
             top5.update(prec5.item(), input.size(0))
-            batch_time.update(time.time() - end)
-            end = time.time()
 
             if i == 0:  # for sotabench.com caching of evaluation
+                memory_allocated = torch.cuda.memory_allocated(device=device)
                 run_hash = calculate_run_hash([prec1, prec5], output)
                 # if we are in check model we don't need to go beyond the first
                 # batch
@@ -59,8 +60,16 @@ def evaluate_classification(
                     )
                     return cached_res, run_hash
 
+            end = time.time()
+
+    speed_mem_metrics = {
+        'Tasks Per Second': test_loader.batch_size/inference_time.avg,
+        'Memory Allocated': memory_allocated
+    }
+
     return (
-        {"Top 1 Accuracy": top1.avg / 100, "Top 5 Accuracy": top5.avg / 100},
+        {"Top 1 Accuracy": top1.avg / 100,
+         "Top 5 Accuracy": top5.avg / 100}, speed_mem_metrics,
         run_hash,
     )
 
diff --git a/torchbench/object_detection/coco.py b/torchbench/object_detection/coco.py
@@ -217,7 +217,7 @@ def benchmark(
             collate_fn=collate_fn,
         )
         test_loader.no_classes = 91  # Number of classes for COCO Detection
-        test_results, run_hash = evaluate_detection_coco(
+        test_results, speed_mem_metrics, run_hash = evaluate_detection_coco(
             model=model,
             test_loader=test_loader,
             model_output_transform=model_output_transform,
@@ -232,6 +232,7 @@ def benchmark(
             config=config,
             dataset='COCO minival',
             results=test_results,
+            speed_mem_metrics=speed_mem_metrics,
             pytorch_hub_id=pytorch_hub_url,
             model=paper_model_name,
             model_description=model_description,
diff --git a/torchbench/object_detection/utils.py b/torchbench/object_detection/utils.py
@@ -6,11 +6,12 @@
 import torchvision
 from sotabenchapi.check import in_check_mode
 from sotabenchapi.client import Client
+import time
 
 from pycocotools import mask as coco_mask
 from pycocotools.coco import COCO
 
-from torchbench.utils import calculate_run_hash
+from torchbench.utils import calculate_run_hash, AverageMeter
 from torchbench.datasets import CocoDetection
 
 from .coco_eval import CocoEvaluator
@@ -198,20 +199,28 @@ def evaluate_detection_coco(
     iou_types = ['bbox']
     coco_evaluator = CocoEvaluator(coco, iou_types)
 
+    inference_time = AverageMeter()
+
     iterator = tqdm.tqdm(test_loader, desc="Evaluation", mininterval=5)
 
+    end = time.time()
+
     with torch.no_grad():
         for i, (input, target) in enumerate(iterator):
             input, target = send_data_to_device(input, target, device=device)
             original_output = model(input)
+
+            inference_time.update(time.time() - end)
+
             output, target = model_output_transform(original_output, target)
+
             result = {
                 tar["image_id"].item(): out for tar, out in zip(target, output)
             }
             coco_evaluator.update(result)
 
-
             if i == 0:  # for sotabench.com caching of evaluation
+                memory_allocated = torch.cuda.memory_allocated(device=device)
                 run_hash = calculate_run_hash([], original_output)
                 # if we are in check model we don't need to go beyond the first
                 # batch
@@ -230,11 +239,17 @@ def evaluate_detection_coco(
                     )
                     return cached_res, run_hash
 
+            end = time.time()
+
     coco_evaluator.synchronize_between_processes()
     coco_evaluator.accumulate()
     coco_evaluator.summarize()
 
-    return (get_coco_metrics(coco_evaluator), run_hash)
+    device_metrics = {
+        'Tasks Per Second': test_loader.batch_size/inference_time.avg,
+        'Memory Allocated': memory_allocated}
+
+    return (get_coco_metrics(coco_evaluator), device_metrics, run_hash)
 
 
 def evaluate_detection_voc(
diff --git a/torchbench/semantic_segmentation/ade20k.py b/torchbench/semantic_segmentation/ade20k.py
@@ -85,7 +85,7 @@ def benchmark(
             collate_fn=collate_fn,
         )
         test_loader.no_classes = 150  # Number of classes for ADE20K
-        test_results, run_hash = evaluate_segmentation(
+        test_results, speed_mem_metrics, run_hash = evaluate_segmentation(
             model=model,
             test_loader=test_loader,
             model_output_transform=model_output_transform,
@@ -100,6 +100,7 @@ def benchmark(
             config=config,
             dataset=cls.dataset.__name__ + " val",
             results=test_results,
+            speed_mem_metrics=speed_mem_metrics,
             pytorch_hub_id=pytorch_hub_url,
             model=paper_model_name,
             model_description=model_description,
diff --git a/torchbench/semantic_segmentation/camvid.py b/torchbench/semantic_segmentation/camvid.py
@@ -85,7 +85,7 @@ def benchmark(
             collate_fn=collate_fn,
         )
         test_loader.no_classes = 12  # Number of classes for CamVid
-        test_results = evaluate_segmentation(
+        test_results, speed_mem_metrics, run_hash = evaluate_segmentation(
             model=model,
             test_loader=test_loader,
             model_output_transform=model_output_transform,
@@ -100,6 +100,7 @@ def benchmark(
             config=config,
             dataset=cls.dataset.__name__,
             results=test_results,
+            speed_mem_metrics=speed_mem_metrics,
             pytorch_hub_id=pytorch_hub_url,
             model=paper_model_name,
             model_description=model_description,
diff --git a/torchbench/semantic_segmentation/cityscapes.py b/torchbench/semantic_segmentation/cityscapes.py
@@ -88,7 +88,7 @@ def benchmark(
             collate_fn=collate_fn,
         )
         test_loader.no_classes = 19  # Number of classes for Cityscapes
-        test_results = evaluate_segmentation(
+        test_results, speed_mem_metrics, run_hash = evaluate_segmentation(
             model=model,
             test_loader=test_loader,
             model_output_transform=model_output_transform,
@@ -103,6 +103,7 @@ def benchmark(
             config=config,
             dataset=cls.dataset.__name__,
             results=test_results,
+            speed_mem_metrics=speed_mem_metrics,
             pytorch_hub_id=pytorch_hub_url,
             model=paper_model_name,
             model_description=model_description,
diff --git a/torchbench/semantic_segmentation/pascalcontext.py b/torchbench/semantic_segmentation/pascalcontext.py
@@ -85,7 +85,7 @@ def benchmark(
             collate_fn=collate_fn,
         )
         test_loader.no_classes = 59  # Number of classes for PASCALContext
-        test_results = evaluate_segmentation(
+        test_results, speed_mem_metrics, run_hash = evaluate_segmentation(
             model=model,
             test_loader=test_loader,
             model_output_transform=model_output_transform,
@@ -100,6 +100,7 @@ def benchmark(
             config=config,
             dataset=cls.dataset.__name__,
             results=test_results,
+            speed_mem_metrics=speed_mem_metrics,
             pytorch_hub_id=pytorch_hub_url,
             model=paper_model_name,
             model_description=model_description,
diff --git a/torchbench/semantic_segmentation/pascalvoc.py b/torchbench/semantic_segmentation/pascalvoc.py
@@ -130,7 +130,7 @@ def benchmark(
             collate_fn=collate_fn,
         )
         test_loader.no_classes = 21  # Number of classes for PASCALVOC
-        test_results, run_hash = evaluate_segmentation(
+        test_results, speed_mem_metrics, run_hash = evaluate_segmentation(
             model=model,
             test_loader=test_loader,
             model_output_transform=model_output_transform,
@@ -144,6 +144,7 @@ def benchmark(
             config=config,
             dataset='PASCAL VOC %s %s' % (dataset_year, "val"),
             results=test_results,
+            speed_mem_metrics=speed_mem_metrics,
             pytorch_hub_id=pytorch_hub_url,
             model=paper_model_name,
             model_description=model_description,
diff --git a/torchbench/semantic_segmentation/utils.py b/torchbench/semantic_segmentation/utils.py
@@ -1,13 +1,14 @@
 import cv2
 import numpy as np
 import tqdm
+import time
 import torch
 from albumentations.core.transforms_interface import DualTransform
 from PIL import Image
 from sotabenchapi.check import in_check_mode
 from sotabenchapi.client import Client
 
-from torchbench.utils import calculate_run_hash
+from torchbench.utils import calculate_run_hash, AverageMeter
 
 
 def minmax_normalize(img, norm_range=(0, 1), orig_range=(0, 255)):
@@ -141,16 +142,24 @@ def evaluate_segmentation(
 ):
     confmat = ConfusionMatrix(test_loader.no_classes)
 
+    inference_time = AverageMeter()
+
     iterator = tqdm.tqdm(test_loader, desc="Evaluation", mininterval=5)
 
+    end = time.time()
+
     with torch.no_grad():
         for i, (input, target) in enumerate(iterator):
             input, target = send_data_to_device(input, target, device=device)
             output = model(input)
+
+            inference_time.update(time.time() - end)
+
             output, target = model_output_transform(output, target)
             confmat.update(target, output)
 
             if i == 0:  # for sotabench.com caching of evaluation
+                memory_allocated = torch.cuda.memory_allocated(device=device)
                 run_hash = calculate_run_hash([], output)
                 # if we are in check model we don't need to go beyond the first
                 # batch
@@ -169,9 +178,15 @@ def evaluate_segmentation(
                     )
                     return cached_res, run_hash
 
+            end = time.time()
+
     acc_global, acc, iu = confmat.compute()
 
+    speed_mem_metrics = {
+        'Tasks Per Second': test_loader.batch_size / inference_time.avg,
+        'Memory Allocated': memory_allocated}
+
     return {
         "Accuracy": acc_global.item(),
-        "Mean IOU": iu.mean().item(),
-    }, run_hash
+        "Mean IOU": iu.mean().item()}, \
+           speed_mem_metrics, run_hash
diff --git a/torchbench/version.py b/torchbench/version.py
@@ -16,5 +16,5 @@ def __repr__(self):
         )
 
 
-version = Version(0, 0, 25)
+version = Version(0, 0, 26)
 __version__ = str(version)