Add the ability to average benchmark runs (#6920)

kenzieschmoll · web-flow · commit 11d4abe4edc1 · 2023-12-08T13:29:26.000-08:00
diff --git a/packages/devtools_app/benchmark/devtools_benchmarks_test.dart b/packages/devtools_app/benchmark/devtools_benchmarks_test.dart
@@ -38,20 +38,16 @@ void main() {
     timeout: const Timeout(Duration(minutes: 10)),
   );
 
-  test(
-    'Can compare web benchmarks',
-    () {
-      final benchmark1 = BenchmarkResults.parse(testBenchmarkResults1);
-      final benchmark2 = BenchmarkResults.parse(testBenchmarkResults2);
-      final comparison = compareBenchmarks(
-        benchmark1,
-        benchmark2,
-        baselineSource: 'path/to/baseline',
-      );
-      expect(comparison, testBenchmarkComparison);
-    },
-    timeout: const Timeout(Duration(minutes: 10)),
-  );
+  test('Can compare web benchmarks', () {
+    final benchmark1 = BenchmarkResults.parse(testBenchmarkResults1);
+    final benchmark2 = BenchmarkResults.parse(testBenchmarkResults2);
+    final comparison = compareBenchmarks(
+      benchmark1,
+      benchmark2,
+      baselineSource: 'path/to/baseline',
+    );
+    expect(comparison, testBenchmarkComparison);
+  });
 
   // TODO(kenz): add tests that verify performance meets some expected threshold
 }
diff --git a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
@@ -15,24 +15,39 @@ import 'utils.dart';
 
 /// Runs the DevTools web benchmarks and reports the benchmark data.
 ///
-/// Arguments:
-/// * --browser - runs the benchmark tests in the browser (non-headless mode)
-/// * --wasm - runs the benchmark tests with the dart2wasm compiler
-///
-/// See [BenchmarkArgs].
+/// To see available arguments, run this script with the `-h` flag.
 Future<void> main(List<String> args) async {
+  if (args.isNotEmpty && args.first == '-h') {
+    stdout.writeln(BenchmarkArgs._buildArgParser().usage);
+    return;
+  }
+
   final benchmarkArgs = BenchmarkArgs(args);
+  final benchmarkResults = <BenchmarkResults>[];
+  for (var i = 0; i < benchmarkArgs.averageOf; i++) {
+    stdout.writeln('Starting web benchmark tests (run #$i) ...');
+    benchmarkResults.add(
+      await serveWebBenchmark(
+        benchmarkAppDirectory: projectRootDirectory(),
+        entryPoint: 'benchmark/test_infra/client.dart',
+        compilationOptions: CompilationOptions(useWasm: benchmarkArgs.useWasm),
+        treeShakeIcons: false,
+        initialPage: benchmarkInitialPage,
+        headless: !benchmarkArgs.useBrowser,
+      ),
+    );
+    stdout.writeln('Web benchmark tests finished (run #$i).');
+  }
 
-  stdout.writeln('Starting web benchmark tests...');
-  final taskResult = await serveWebBenchmark(
-    benchmarkAppDirectory: projectRootDirectory(),
-    entryPoint: 'benchmark/test_infra/client.dart',
-    compilationOptions: CompilationOptions(useWasm: benchmarkArgs.useWasm),
-    treeShakeIcons: false,
-    initialPage: benchmarkInitialPage,
-    headless: !benchmarkArgs.useBrowser,
-  );
-  stdout.writeln('Web benchmark tests finished.');
+  late final BenchmarkResults taskResult;
+  if (benchmarkArgs.averageOf == 1) {
+    taskResult = benchmarkResults.first;
+  } else {
+    stdout.writeln(
+      'Taking the average of ${benchmarkResults.length} benchmark runs.',
+    );
+    taskResult = averageBenchmarkResults(benchmarkResults);
+  }
 
   final resultsAsMap = taskResult.toJson();
   final resultsAsJsonString =
@@ -84,6 +99,8 @@ class BenchmarkArgs {
 
   bool get useWasm => argResults[_wasmFlag];
 
+  int get averageOf => int.parse(argResults[_averageOfOption]);
+
   String? get saveToFileLocation => argResults[_saveToFileOption];
 
   String? get baselineLocation => argResults[_baselineOption];
@@ -96,15 +113,19 @@ class BenchmarkArgs {
 
   static const _baselineOption = 'baseline';
 
+  static const _averageOfOption = 'average-of';
+
   /// Builds an arg parser for DevTools benchmarks.
   static ArgParser _buildArgParser() {
     return ArgParser()
       ..addFlag(
         _browserFlag,
+        negatable: false,
         help: 'Runs the benchmark tests in browser mode (not headless mode).',
       )
       ..addFlag(
         _wasmFlag,
+        negatable: false,
         help: 'Runs the benchmark tests with dart2wasm',
       )
       ..addOption(
@@ -118,6 +139,44 @@ class BenchmarkArgs {
             'baseline file should be created by running this script with the '
             '$_saveToFileOption in a separate test run.',
         valueHelp: '/Users/me/Downloads/baseline.json',
+      )
+      ..addOption(
+        _averageOfOption,
+        defaultsTo: '1',
+        help: 'The number of times to run the benchmark. The returned results '
+            'will be the average of all the benchmark runs when this value is '
+            'greater than 1.',
+        valueHelp: '5',
       );
   }
 }
+
+// TODO(kenz): upstream the logic to average benchmarks into the
+// package:web_benchmarks
+
+/// Returns the average of the benchmark results in [results].
+///
+/// Each element in [results] is expected to have identical benchmark names and
+/// metrics; otherwise, an [Exception] will be thrown.
+BenchmarkResults averageBenchmarkResults(List<BenchmarkResults> results) {
+  if (results.isEmpty) {
+    throw Exception('Cannot take average of empty list.');
+  }
+
+  var totalSum = results.first;
+  for (int i = 1; i < results.length; i++) {
+    final current = results[i];
+    totalSum = totalSum.sumWith(current);
+  }
+
+  final average = totalSum.toJson();
+  for (final benchmark in totalSum.scores.keys) {
+    final scoresForBenchmark = totalSum.scores[benchmark]!;
+    for (int i = 0; i < scoresForBenchmark.length; i++) {
+      final score = scoresForBenchmark[i];
+      final averageValue = score.value / results.length;
+      average[benchmark]![i]['value'] = averageValue;
+    }
+  }
+  return BenchmarkResults.parse(average);
+}
diff --git a/packages/devtools_app/benchmark/scripts/utils.dart b/packages/devtools_app/benchmark/scripts/utils.dart
@@ -4,6 +4,9 @@
 
 import 'dart:io';
 
+import 'package:collection/collection.dart';
+import 'package:web_benchmarks/server.dart';
+
 File? checkFileExists(String path) {
   final testFile = File.fromUri(Uri.parse(path));
   if (!testFile.existsSync()) {
@@ -12,3 +15,50 @@ File? checkFileExists(String path) {
   }
   return testFile;
 }
+
+extension BenchmarkResultsExtension on BenchmarkResults {
+  /// Sums this [BenchmarkResults] instance with [other] by adding the values
+  /// of each matching benchmark score.
+  ///
+  /// Returns a [BenchmarkResults] object with the summed values.
+  BenchmarkResults sumWith(
+    BenchmarkResults other, {
+    bool throwExceptionOnMismatch = true,
+  }) {
+    final sum = toJson();
+    for (final benchmark in scores.keys) {
+      // Look up this benchmark in [other].
+      final matchingBenchmark = other.scores[benchmark];
+      if (matchingBenchmark == null) {
+        if (throwExceptionOnMismatch) {
+          throw Exception(
+            'Cannot sum benchmarks because [other] is missing an entry for '
+            'benchmark "$benchmark".',
+          );
+        }
+        continue;
+      }
+
+      final scoresForBenchmark = scores[benchmark]!;
+      for (int i = 0; i < scoresForBenchmark.length; i++) {
+        final score = scoresForBenchmark[i];
+        // Look up this score in the [matchingBenchmark] from [other].
+        final matchingScore =
+            matchingBenchmark.firstWhereOrNull((s) => s.metric == score.metric);
+        if (matchingScore == null) {
+          if (throwExceptionOnMismatch) {
+            throw Exception(
+              'Cannot sum benchmarks because benchmark "$benchmark" is missing '
+              'a score for metric ${score.metric}.',
+            );
+          }
+          continue;
+        }
+
+        final sumScore = score.value + matchingScore.value;
+        sum[benchmark]![i]['value'] = sumScore;
+      }
+    }
+    return BenchmarkResults.parse(sum);
+  }
+}