From aa16136e3f6501bc41b303f6a3073a40c373c927 Mon Sep 17 00:00:00 2001
From: Kenzie Schmoll <kenzieschmoll@google.com>
Date: Fri, 8 Dec 2023 12:07:47 -0800
Subject: [PATCH 1/3] Add the ability to average benchmark runs

---
 .../benchmark/scripts/run_benchmarks.dart     | 82 +++++++++++++++----
 .../devtools_app/benchmark/scripts/utils.dart | 46 +++++++++++
 2 files changed, 113 insertions(+), 15 deletions(-)
diff --git a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
index eb052867a9f..1883dd5ac9f 100644
--- a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
+++ b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
@@ -15,24 +15,39 @@ import 'utils.dart';
 
 /// Runs the DevTools web benchmarks and reports the benchmark data.
 ///
-/// Arguments:
-/// * --browser - runs the benchmark tests in the browser (non-headless mode)
-/// * --wasm - runs the benchmark tests with the dart2wasm compiler
-///
-/// See [BenchmarkArgs].
+/// To see available arguments, run this script with the `-h` flag.
 Future<void> main(List<String> args) async {
+  if (args.isNotEmpty && args.first == '-h') {
+    stdout.writeln(BenchmarkArgs._buildArgParser().usage);
+    return;
+  }
+
   final benchmarkArgs = BenchmarkArgs(args);
+  final benchmarkResults = <BenchmarkResults>[];
+  for (var i = 0; i < benchmarkArgs.averageOf; i++) {
+    stdout.writeln('Starting web benchmark tests (run #$i) ...');
+    benchmarkResults.add(
+      await serveWebBenchmark(
+        benchmarkAppDirectory: projectRootDirectory(),
+        entryPoint: 'benchmark/test_infra/client.dart',
+        compilationOptions: CompilationOptions(useWasm: benchmarkArgs.useWasm),
+        treeShakeIcons: false,
+        initialPage: benchmarkInitialPage,
+        headless: !benchmarkArgs.useBrowser,
+      ),
+    );
+    stdout.writeln('Web benchmark tests finished (run #$i).');
+  }
 
-  stdout.writeln('Starting web benchmark tests...');
-  final taskResult = await serveWebBenchmark(
-    benchmarkAppDirectory: projectRootDirectory(),
-    entryPoint: 'benchmark/test_infra/client.dart',
-    compilationOptions: CompilationOptions(useWasm: benchmarkArgs.useWasm),
-    treeShakeIcons: false,
-    initialPage: benchmarkInitialPage,
-    headless: !benchmarkArgs.useBrowser,
-  );
-  stdout.writeln('Web benchmark tests finished.');
+  late final BenchmarkResults taskResult;
+  if (benchmarkArgs.averageOf == 1) {
+    taskResult = benchmarkResults.first;
+  } else {
+    stdout.writeln(
+      'Taking the average of ${benchmarkResults.length} benchmark runs.',
+    );
+    taskResult = averageBenchmarkResults(benchmarkResults);
+  }
 
   final resultsAsMap = taskResult.toJson();
   final resultsAsJsonString =
@@ -84,6 +99,8 @@ class BenchmarkArgs {
 
   bool get useWasm => argResults[_wasmFlag];
 
+  int get averageOf => int.parse(argResults[_averageOfOption]);
+
   String? get saveToFileLocation => argResults[_saveToFileOption];
 
   String? get baselineLocation => argResults[_baselineOption];
@@ -96,15 +113,19 @@ class BenchmarkArgs {
 
   static const _baselineOption = 'baseline';
 
+  static const _averageOfOption = 'average-of';
+
   /// Builds an arg parser for DevTools benchmarks.
   static ArgParser _buildArgParser() {
     return ArgParser()
       ..addFlag(
         _browserFlag,
+        negatable: false,
         help: 'Runs the benchmark tests in browser mode (not headless mode).',
       )
       ..addFlag(
         _wasmFlag,
+        negatable: false,
         help: 'Runs the benchmark tests with dart2wasm',
       )
       ..addOption(
@@ -118,6 +139,37 @@ class BenchmarkArgs {
             'baseline file should be created by running this script with the '
             '$_saveToFileOption in a separate test run.',
         valueHelp: '/Users/me/Downloads/baseline.json',
+      )
+      ..addOption(
+        _averageOfOption,
+        defaultsTo: '1',
+        help: 'The number of times to run the benchmark. The returned results '
+            'will be the average of all the benchmark runs when this value is '
+            'greater than 1.',
+        valueHelp: '5',
       );
   }
 }
+
+BenchmarkResults averageBenchmarkResults(List<BenchmarkResults> results) {
+  if (results.isEmpty) {
+    throw Exception('Cannot take average of empty list.');
+  }
+
+  var totalSum = results.first;
+  for (int i = 1; i < results.length; i++) {
+    final current = results[i];
+    totalSum = totalSum.sumWith(current);
+  }
+
+  final average = totalSum.toJson();
+  for (final benchmark in totalSum.scores.keys) {
+    final scoresForBenchmark = totalSum.scores[benchmark]!;
+    for (int i = 0; i < scoresForBenchmark.length; i++) {
+      final score = scoresForBenchmark[i];
+      final averageScore = score.value / results.length;
+      average[benchmark]![i][score.metric] = averageScore;
+    }
+  }
+  return BenchmarkResults.parse(average);
+}
diff --git a/packages/devtools_app/benchmark/scripts/utils.dart b/packages/devtools_app/benchmark/scripts/utils.dart
index a4cbc99bead..cf4b730adb6 100644
--- a/packages/devtools_app/benchmark/scripts/utils.dart
+++ b/packages/devtools_app/benchmark/scripts/utils.dart
@@ -4,6 +4,9 @@
 
 import 'dart:io';
 
+import 'package:collection/collection.dart';
+import 'package:web_benchmarks/server.dart';
+
 File? checkFileExists(String path) {
   final testFile = File.fromUri(Uri.parse(path));
   if (!testFile.existsSync()) {
@@ -12,3 +15,46 @@ File? checkFileExists(String path) {
   }
   return testFile;
 }
+
+extension BenchmarkResultsExtension on BenchmarkResults {
+  BenchmarkResults sumWith(
+    BenchmarkResults other, {
+    bool throwExceptionOnMismatch = true,
+  }) {
+    final sum = toJson();
+    for (final benchmark in scores.keys) {
+      // Look up this benchmark in [other].
+      final matchingBenchmark = other.scores[benchmark];
+      if (matchingBenchmark == null) {
+        if (throwExceptionOnMismatch) {
+          throw Exception(
+            'Cannot sum benchmarks because [other] is missing an entry for '
+            'benchmark "$benchmark".',
+          );
+        }
+        continue;
+      }
+
+      final scoresForBenchmark = scores[benchmark]!;
+      for (int i = 0; i < scoresForBenchmark.length; i++) {
+        final score = scoresForBenchmark[i];
+        // Look up this score in the [matchingBenchmark] from [other].
+        final matchingScore =
+            matchingBenchmark.firstWhereOrNull((s) => s.metric == score.metric);
+        if (matchingScore == null) {
+          if (throwExceptionOnMismatch) {
+            throw Exception(
+              'Cannot sum benchmarks because benchmark "$benchmark" is missing '
+              'a score for metric ${score.metric}.',
+            );
+          }
+          continue;
+        }
+
+        final sumScore = score.value + matchingScore.value;
+        sum[benchmark]![i][score.metric] = sumScore;
+      }
+    }
+    return BenchmarkResults.parse(sum);
+  }
+}

From d90c6ba6268aa00973c31412fda6e783c2cef674 Mon Sep 17 00:00:00 2001
From: Kenzie Schmoll <kenzieschmoll@google.com>
Date: Fri, 8 Dec 2023 12:36:19 -0800
Subject: [PATCH 2/3] fix logic bugs

---
 .../benchmark/scripts/run_benchmarks.dart             | 11 +++++++++--
 packages/devtools_app/benchmark/scripts/utils.dart    |  6 +++++-
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
index 1883dd5ac9f..7799ce16c1a 100644
--- a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
+++ b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
@@ -151,6 +151,13 @@ class BenchmarkArgs {
   }
 }
 
+// TODO(kenz): upstream the logic to average benchmarks into the
+// package:web_benchmarks
+
+/// Returns the average of the benchmark results in [results].
+///
+/// Each element in [results] is expected to have identical benchmark names and
+/// metrics; otherwise, an [Exception] will be thrown.
 BenchmarkResults averageBenchmarkResults(List<BenchmarkResults> results) {
   if (results.isEmpty) {
     throw Exception('Cannot take average of empty list.');
@@ -167,8 +174,8 @@ BenchmarkResults averageBenchmarkResults(List<BenchmarkResults> results) {
     final scoresForBenchmark = totalSum.scores[benchmark]!;
     for (int i = 0; i < scoresForBenchmark.length; i++) {
       final score = scoresForBenchmark[i];
-      final averageScore = score.value / results.length;
-      average[benchmark]![i][score.metric] = averageScore;
+      final averageValue = score.value / results.length;
+      average[benchmark]![i]['value'] = averageValue;
     }
   }
   return BenchmarkResults.parse(average);
diff --git a/packages/devtools_app/benchmark/scripts/utils.dart b/packages/devtools_app/benchmark/scripts/utils.dart
index cf4b730adb6..2a61898cd21 100644
--- a/packages/devtools_app/benchmark/scripts/utils.dart
+++ b/packages/devtools_app/benchmark/scripts/utils.dart
@@ -17,6 +17,10 @@ File? checkFileExists(String path) {
 }
 
 extension BenchmarkResultsExtension on BenchmarkResults {
+  /// Sums this [BenchmarkResults] instance with [other] by adding the values
+  /// of each matching benchmark score.
+  /// 
+  /// Returns a [BenchmarkResults] object with the summed values.
   BenchmarkResults sumWith(
     BenchmarkResults other, {
     bool throwExceptionOnMismatch = true,
@@ -52,7 +56,7 @@ extension BenchmarkResultsExtension on BenchmarkResults {
         }
 
         final sumScore = score.value + matchingScore.value;
-        sum[benchmark]![i][score.metric] = sumScore;
+        sum[benchmark]![i]['value'] = sumScore;
       }
     }
     return BenchmarkResults.parse(sum);

From 9b2b0d65fb7d5d12bcd43874405a31a2286238fd Mon Sep 17 00:00:00 2001
From: Kenzie Schmoll <kenzieschmoll@google.com>
Date: Fri, 8 Dec 2023 13:05:16 -0800
Subject: [PATCH 3/3] formatting

---
 .../benchmark/devtools_benchmarks_test.dart   | 24 ++++++++-----------
 .../devtools_app/benchmark/scripts/utils.dart |  2 +-
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/packages/devtools_app/benchmark/devtools_benchmarks_test.dart b/packages/devtools_app/benchmark/devtools_benchmarks_test.dart
index 14cdcf5d6eb..ff0cf3f4d01 100644
--- a/packages/devtools_app/benchmark/devtools_benchmarks_test.dart
+++ b/packages/devtools_app/benchmark/devtools_benchmarks_test.dart
@@ -38,20 +38,16 @@ void main() {
     timeout: const Timeout(Duration(minutes: 10)),
   );
 
-  test(
-    'Can compare web benchmarks',
-    () {
-      final benchmark1 = BenchmarkResults.parse(testBenchmarkResults1);
-      final benchmark2 = BenchmarkResults.parse(testBenchmarkResults2);
-      final comparison = compareBenchmarks(
-        benchmark1,
-        benchmark2,
-        baselineSource: 'path/to/baseline',
-      );
-      expect(comparison, testBenchmarkComparison);
-    },
-    timeout: const Timeout(Duration(minutes: 10)),
-  );
+  test('Can compare web benchmarks', () {
+    final benchmark1 = BenchmarkResults.parse(testBenchmarkResults1);
+    final benchmark2 = BenchmarkResults.parse(testBenchmarkResults2);
+    final comparison = compareBenchmarks(
+      benchmark1,
+      benchmark2,
+      baselineSource: 'path/to/baseline',
+    );
+    expect(comparison, testBenchmarkComparison);
+  });
 
   // TODO(kenz): add tests that verify performance meets some expected threshold
 }
diff --git a/packages/devtools_app/benchmark/scripts/utils.dart b/packages/devtools_app/benchmark/scripts/utils.dart
index 2a61898cd21..01df4c2143c 100644
--- a/packages/devtools_app/benchmark/scripts/utils.dart
+++ b/packages/devtools_app/benchmark/scripts/utils.dart
@@ -19,7 +19,7 @@ File? checkFileExists(String path) {
 extension BenchmarkResultsExtension on BenchmarkResults {
   /// Sums this [BenchmarkResults] instance with [other] by adding the values
   /// of each matching benchmark score.
-  /// 
+  ///
   /// Returns a [BenchmarkResults] object with the summed values.
   BenchmarkResults sumWith(
     BenchmarkResults other, {