From aa16136e3f6501bc41b303f6a3073a40c373c927 Mon Sep 17 00:00:00 2001 From: Kenzie Schmoll Date: Fri, 8 Dec 2023 12:07:47 -0800 Subject: [PATCH 1/3] Add the ability to average benchmark runs --- .../benchmark/scripts/run_benchmarks.dart | 82 +++++++++++++++---- .../devtools_app/benchmark/scripts/utils.dart | 46 +++++++++++ 2 files changed, 113 insertions(+), 15 deletions(-) diff --git a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart index eb052867a9f..1883dd5ac9f 100644 --- a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart +++ b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart @@ -15,24 +15,39 @@ import 'utils.dart'; /// Runs the DevTools web benchmarks and reports the benchmark data. /// -/// Arguments: -/// * --browser - runs the benchmark tests in the browser (non-headless mode) -/// * --wasm - runs the benchmark tests with the dart2wasm compiler -/// -/// See [BenchmarkArgs]. +/// To see available arguments, run this script with the `-h` flag. Future main(List args) async { + if (args.isNotEmpty && args.first == '-h') { + stdout.writeln(BenchmarkArgs._buildArgParser().usage); + return; + } + final benchmarkArgs = BenchmarkArgs(args); + final benchmarkResults = []; + for (var i = 0; i < benchmarkArgs.averageOf; i++) { + stdout.writeln('Starting web benchmark tests (run #$i) ...'); + benchmarkResults.add( + await serveWebBenchmark( + benchmarkAppDirectory: projectRootDirectory(), + entryPoint: 'benchmark/test_infra/client.dart', + compilationOptions: CompilationOptions(useWasm: benchmarkArgs.useWasm), + treeShakeIcons: false, + initialPage: benchmarkInitialPage, + headless: !benchmarkArgs.useBrowser, + ), + ); + stdout.writeln('Web benchmark tests finished (run #$i).'); + } - stdout.writeln('Starting web benchmark tests...'); - final taskResult = await serveWebBenchmark( - benchmarkAppDirectory: projectRootDirectory(), - entryPoint: 'benchmark/test_infra/client.dart', - compilationOptions: CompilationOptions(useWasm: benchmarkArgs.useWasm), - treeShakeIcons: false, - initialPage: benchmarkInitialPage, - headless: !benchmarkArgs.useBrowser, - ); - stdout.writeln('Web benchmark tests finished.'); + late final BenchmarkResults taskResult; + if (benchmarkArgs.averageOf == 1) { + taskResult = benchmarkResults.first; + } else { + stdout.writeln( + 'Taking the average of ${benchmarkResults.length} benchmark runs.', + ); + taskResult = averageBenchmarkResults(benchmarkResults); + } final resultsAsMap = taskResult.toJson(); final resultsAsJsonString = @@ -84,6 +99,8 @@ class BenchmarkArgs { bool get useWasm => argResults[_wasmFlag]; + int get averageOf => int.parse(argResults[_averageOfOption]); + String? get saveToFileLocation => argResults[_saveToFileOption]; String? get baselineLocation => argResults[_baselineOption]; @@ -96,15 +113,19 @@ class BenchmarkArgs { static const _baselineOption = 'baseline'; + static const _averageOfOption = 'average-of'; + /// Builds an arg parser for DevTools benchmarks. static ArgParser _buildArgParser() { return ArgParser() ..addFlag( _browserFlag, + negatable: false, help: 'Runs the benchmark tests in browser mode (not headless mode).', ) ..addFlag( _wasmFlag, + negatable: false, help: 'Runs the benchmark tests with dart2wasm', ) ..addOption( @@ -118,6 +139,37 @@ class BenchmarkArgs { 'baseline file should be created by running this script with the ' '$_saveToFileOption in a separate test run.', valueHelp: '/Users/me/Downloads/baseline.json', + ) + ..addOption( + _averageOfOption, + defaultsTo: '1', + help: 'The number of times to run the benchmark. The returned results ' + 'will be the average of all the benchmark runs when this value is ' + 'greater than 1.', + valueHelp: '5', ); } } + +BenchmarkResults averageBenchmarkResults(List results) { + if (results.isEmpty) { + throw Exception('Cannot take average of empty list.'); + } + + var totalSum = results.first; + for (int i = 1; i < results.length; i++) { + final current = results[i]; + totalSum = totalSum.sumWith(current); + } + + final average = totalSum.toJson(); + for (final benchmark in totalSum.scores.keys) { + final scoresForBenchmark = totalSum.scores[benchmark]!; + for (int i = 0; i < scoresForBenchmark.length; i++) { + final score = scoresForBenchmark[i]; + final averageScore = score.value / results.length; + average[benchmark]![i][score.metric] = averageScore; + } + } + return BenchmarkResults.parse(average); +} diff --git a/packages/devtools_app/benchmark/scripts/utils.dart b/packages/devtools_app/benchmark/scripts/utils.dart index a4cbc99bead..cf4b730adb6 100644 --- a/packages/devtools_app/benchmark/scripts/utils.dart +++ b/packages/devtools_app/benchmark/scripts/utils.dart @@ -4,6 +4,9 @@ import 'dart:io'; +import 'package:collection/collection.dart'; +import 'package:web_benchmarks/server.dart'; + File? checkFileExists(String path) { final testFile = File.fromUri(Uri.parse(path)); if (!testFile.existsSync()) { @@ -12,3 +15,46 @@ File? checkFileExists(String path) { } return testFile; } + +extension BenchmarkResultsExtension on BenchmarkResults { + BenchmarkResults sumWith( + BenchmarkResults other, { + bool throwExceptionOnMismatch = true, + }) { + final sum = toJson(); + for (final benchmark in scores.keys) { + // Look up this benchmark in [other]. + final matchingBenchmark = other.scores[benchmark]; + if (matchingBenchmark == null) { + if (throwExceptionOnMismatch) { + throw Exception( + 'Cannot sum benchmarks because [other] is missing an entry for ' + 'benchmark "$benchmark".', + ); + } + continue; + } + + final scoresForBenchmark = scores[benchmark]!; + for (int i = 0; i < scoresForBenchmark.length; i++) { + final score = scoresForBenchmark[i]; + // Look up this score in the [matchingBenchmark] from [other]. + final matchingScore = + matchingBenchmark.firstWhereOrNull((s) => s.metric == score.metric); + if (matchingScore == null) { + if (throwExceptionOnMismatch) { + throw Exception( + 'Cannot sum benchmarks because benchmark "$benchmark" is missing ' + 'a score for metric ${score.metric}.', + ); + } + continue; + } + + final sumScore = score.value + matchingScore.value; + sum[benchmark]![i][score.metric] = sumScore; + } + } + return BenchmarkResults.parse(sum); + } +} From d90c6ba6268aa00973c31412fda6e783c2cef674 Mon Sep 17 00:00:00 2001 From: Kenzie Schmoll Date: Fri, 8 Dec 2023 12:36:19 -0800 Subject: [PATCH 2/3] fix logic bugs --- .../benchmark/scripts/run_benchmarks.dart | 11 +++++++++-- packages/devtools_app/benchmark/scripts/utils.dart | 6 +++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart index 1883dd5ac9f..7799ce16c1a 100644 --- a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart +++ b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart @@ -151,6 +151,13 @@ class BenchmarkArgs { } } +// TODO(kenz): upstream the logic to average benchmarks into the +// package:web_benchmarks + +/// Returns the average of the benchmark results in [results]. +/// +/// Each element in [results] is expected to have identical benchmark names and +/// metrics; otherwise, an [Exception] will be thrown. BenchmarkResults averageBenchmarkResults(List results) { if (results.isEmpty) { throw Exception('Cannot take average of empty list.'); @@ -167,8 +174,8 @@ BenchmarkResults averageBenchmarkResults(List results) { final scoresForBenchmark = totalSum.scores[benchmark]!; for (int i = 0; i < scoresForBenchmark.length; i++) { final score = scoresForBenchmark[i]; - final averageScore = score.value / results.length; - average[benchmark]![i][score.metric] = averageScore; + final averageValue = score.value / results.length; + average[benchmark]![i]['value'] = averageValue; } } return BenchmarkResults.parse(average); diff --git a/packages/devtools_app/benchmark/scripts/utils.dart b/packages/devtools_app/benchmark/scripts/utils.dart index cf4b730adb6..2a61898cd21 100644 --- a/packages/devtools_app/benchmark/scripts/utils.dart +++ b/packages/devtools_app/benchmark/scripts/utils.dart @@ -17,6 +17,10 @@ File? checkFileExists(String path) { } extension BenchmarkResultsExtension on BenchmarkResults { + /// Sums this [BenchmarkResults] instance with [other] by adding the values + /// of each matching benchmark score. + /// + /// Returns a [BenchmarkResults] object with the summed values. BenchmarkResults sumWith( BenchmarkResults other, { bool throwExceptionOnMismatch = true, @@ -52,7 +56,7 @@ extension BenchmarkResultsExtension on BenchmarkResults { } final sumScore = score.value + matchingScore.value; - sum[benchmark]![i][score.metric] = sumScore; + sum[benchmark]![i]['value'] = sumScore; } } return BenchmarkResults.parse(sum); From 9b2b0d65fb7d5d12bcd43874405a31a2286238fd Mon Sep 17 00:00:00 2001 From: Kenzie Schmoll Date: Fri, 8 Dec 2023 13:05:16 -0800 Subject: [PATCH 3/3] formatting --- .../benchmark/devtools_benchmarks_test.dart | 24 ++++++++----------- .../devtools_app/benchmark/scripts/utils.dart | 2 +- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/packages/devtools_app/benchmark/devtools_benchmarks_test.dart b/packages/devtools_app/benchmark/devtools_benchmarks_test.dart index 14cdcf5d6eb..ff0cf3f4d01 100644 --- a/packages/devtools_app/benchmark/devtools_benchmarks_test.dart +++ b/packages/devtools_app/benchmark/devtools_benchmarks_test.dart @@ -38,20 +38,16 @@ void main() { timeout: const Timeout(Duration(minutes: 10)), ); - test( - 'Can compare web benchmarks', - () { - final benchmark1 = BenchmarkResults.parse(testBenchmarkResults1); - final benchmark2 = BenchmarkResults.parse(testBenchmarkResults2); - final comparison = compareBenchmarks( - benchmark1, - benchmark2, - baselineSource: 'path/to/baseline', - ); - expect(comparison, testBenchmarkComparison); - }, - timeout: const Timeout(Duration(minutes: 10)), - ); + test('Can compare web benchmarks', () { + final benchmark1 = BenchmarkResults.parse(testBenchmarkResults1); + final benchmark2 = BenchmarkResults.parse(testBenchmarkResults2); + final comparison = compareBenchmarks( + benchmark1, + benchmark2, + baselineSource: 'path/to/baseline', + ); + expect(comparison, testBenchmarkComparison); + }); // TODO(kenz): add tests that verify performance meets some expected threshold } diff --git a/packages/devtools_app/benchmark/scripts/utils.dart b/packages/devtools_app/benchmark/scripts/utils.dart index 2a61898cd21..01df4c2143c 100644 --- a/packages/devtools_app/benchmark/scripts/utils.dart +++ b/packages/devtools_app/benchmark/scripts/utils.dart @@ -19,7 +19,7 @@ File? checkFileExists(String path) { extension BenchmarkResultsExtension on BenchmarkResults { /// Sums this [BenchmarkResults] instance with [other] by adding the values /// of each matching benchmark score. - /// + /// /// Returns a [BenchmarkResults] object with the summed values. BenchmarkResults sumWith( BenchmarkResults other, {