diff --git a/packages/devtools_app/benchmark/README.md b/packages/devtools_app/benchmark/README.md index ab06b1776de..195c3e9a569 100644 --- a/packages/devtools_app/benchmark/README.md +++ b/packages/devtools_app/benchmark/README.md @@ -21,7 +21,6 @@ All of the commands below should be run from the `packages/devtools_app` directo To run the performance benchmark tests locally, run: ```sh dart run benchmark/scripts/run_benchmarks.dart -dart run benchmark/run_benchmarks.dart ``` To run the test that verifies we can run benchmark tests, run: @@ -48,4 +47,18 @@ the other running tests are using. The tests are defined by "automators", which live in the `benchmark/test_infra/automators` directory. To add a new test or test case, either modify an existing automator or add -a new one for a new screen. Follow existing examples in that directory for guidance. \ No newline at end of file +a new one for a new screen. Follow existing examples in that directory for guidance. + +## Comparing two benchmark test runs + +In order to compare two different benchmark runs, you first need to run the benchmark +tests and save the results to a file: +```sh +dart run benchmark/scripts/run_benchmarks.dart --save-to-file=baseline.json +dart run benchmark/scripts/run_benchmarks.dart --save-to-file=test.json +``` + +Then, to compare the benchmarks and calculate deltas, run: +```sh +dart run benchmark/scripts/compare_benchmarks.dart baseline_file.json test_file.json +``` diff --git a/packages/devtools_app/benchmark/devtools_benchmarks_test.dart b/packages/devtools_app/benchmark/devtools_benchmarks_test.dart index 35d687a7261..14cdcf5d6eb 100644 --- a/packages/devtools_app/benchmark/devtools_benchmarks_test.dart +++ b/packages/devtools_app/benchmark/devtools_benchmarks_test.dart @@ -11,6 +11,7 @@ import 'dart:io'; import 'package:test/test.dart'; import 'package:web_benchmarks/server.dart'; +import 'scripts/compare_benchmarks.dart'; import 'test_infra/common.dart'; import 'test_infra/project_root_directory.dart'; @@ -37,6 +38,21 @@ void main() { timeout: const Timeout(Duration(minutes: 10)), ); + test( + 'Can compare web benchmarks', + () { + final benchmark1 = BenchmarkResults.parse(testBenchmarkResults1); + final benchmark2 = BenchmarkResults.parse(testBenchmarkResults2); + final comparison = compareBenchmarks( + benchmark1, + benchmark2, + baselineSource: 'path/to/baseline', + ); + expect(comparison, testBenchmarkComparison); + }, + timeout: const Timeout(Duration(minutes: 10)), + ); + // TODO(kenz): add tests that verify performance meets some expected threshold } @@ -86,3 +102,134 @@ Future _runBenchmarks({bool useWasm = false}) async { isA(), ); } + +final testBenchmarkResults1 = { + 'foo': [ + {'metric': 'preroll_frame.average', 'value': 60.5}, + {'metric': 'preroll_frame.outlierAverage', 'value': 1400}, + {'metric': 'preroll_frame.outlierRatio', 'value': 20.2}, + {'metric': 'preroll_frame.noise', 'value': 0.85}, + {'metric': 'apply_frame.average', 'value': 80.0}, + {'metric': 'apply_frame.outlierAverage', 'value': 200.6}, + {'metric': 'apply_frame.outlierRatio', 'value': 2.5}, + {'metric': 'apply_frame.noise', 'value': 0.4}, + {'metric': 'drawFrameDuration.average', 'value': 2058.9}, + {'metric': 'drawFrameDuration.outlierAverage', 'value': 24000}, + {'metric': 'drawFrameDuration.outlierRatio', 'value': 12.05}, + {'metric': 'drawFrameDuration.noise', 'value': 0.34}, + {'metric': 'totalUiFrame.average', 'value': 4166}, + ], + 'bar': [ + {'metric': 'preroll_frame.average', 'value': 60.5}, + {'metric': 'preroll_frame.outlierAverage', 'value': 1400}, + {'metric': 'preroll_frame.outlierRatio', 'value': 20.2}, + {'metric': 'preroll_frame.noise', 'value': 0.85}, + {'metric': 'apply_frame.average', 'value': 80.0}, + {'metric': 'apply_frame.outlierAverage', 'value': 200.6}, + {'metric': 'apply_frame.outlierRatio', 'value': 2.5}, + {'metric': 'apply_frame.noise', 'value': 0.4}, + {'metric': 'drawFrameDuration.average', 'value': 2058.9}, + {'metric': 'drawFrameDuration.outlierAverage', 'value': 24000}, + {'metric': 'drawFrameDuration.outlierRatio', 'value': 12.05}, + {'metric': 'drawFrameDuration.noise', 'value': 0.34}, + {'metric': 'totalUiFrame.average', 'value': 4166}, + ], +}; + +final testBenchmarkResults2 = { + 'foo': [ + {'metric': 'preroll_frame.average', 'value': 65.5}, + {'metric': 'preroll_frame.outlierAverage', 'value': 1410}, + {'metric': 'preroll_frame.outlierRatio', 'value': 20.0}, + {'metric': 'preroll_frame.noise', 'value': 1.5}, + {'metric': 'apply_frame.average', 'value': 50.0}, + {'metric': 'apply_frame.outlierAverage', 'value': 100.0}, + {'metric': 'apply_frame.outlierRatio', 'value': 2.55}, + {'metric': 'apply_frame.noise', 'value': 0.9}, + {'metric': 'drawFrameDuration.average', 'value': 2000.0}, + {'metric': 'drawFrameDuration.outlierAverage', 'value': 20000}, + {'metric': 'drawFrameDuration.outlierRatio', 'value': 11.05}, + {'metric': 'drawFrameDuration.noise', 'value': 1.34}, + {'metric': 'totalUiFrame.average', 'value': 4150}, + ], + 'bar': [ + {'metric': 'preroll_frame.average', 'value': 65.5}, + {'metric': 'preroll_frame.outlierAverage', 'value': 1410}, + {'metric': 'preroll_frame.outlierRatio', 'value': 20.0}, + {'metric': 'preroll_frame.noise', 'value': 1.5}, + {'metric': 'apply_frame.average', 'value': 50.0}, + {'metric': 'apply_frame.outlierAverage', 'value': 100.0}, + {'metric': 'apply_frame.outlierRatio', 'value': 2.55}, + {'metric': 'apply_frame.noise', 'value': 0.9}, + {'metric': 'drawFrameDuration.average', 'value': 2000.0}, + {'metric': 'drawFrameDuration.outlierAverage', 'value': 20000}, + {'metric': 'drawFrameDuration.outlierRatio', 'value': 11.05}, + {'metric': 'drawFrameDuration.noise', 'value': 1.34}, + {'metric': 'totalUiFrame.average', 'value': 4150}, + ], +}; + +final testBenchmarkComparison = { + 'foo': [ + {'metric': 'preroll_frame.average', 'value': 65.5, 'delta': 5.0}, + {'metric': 'preroll_frame.outlierAverage', 'value': 1410.0, 'delta': 10.0}, + { + 'metric': 'preroll_frame.outlierRatio', + 'value': 20.0, + 'delta': -0.1999999999999993, + }, + {'metric': 'preroll_frame.noise', 'value': 1.5, 'delta': 0.65}, + {'metric': 'apply_frame.average', 'value': 50.0, 'delta': -30.0}, + {'metric': 'apply_frame.outlierAverage', 'value': 100.0, 'delta': -100.6}, + { + 'metric': 'apply_frame.outlierRatio', + 'value': 2.55, + 'delta': 0.04999999999999982, + }, + {'metric': 'apply_frame.noise', 'value': 0.9, 'delta': 0.5}, + { + 'metric': 'drawFrameDuration.average', + 'value': 2000.0, + 'delta': -58.90000000000009, + }, + { + 'metric': 'drawFrameDuration.outlierAverage', + 'value': 20000.0, + 'delta': -4000.0, + }, + {'metric': 'drawFrameDuration.outlierRatio', 'value': 11.05, 'delta': -1.0}, + {'metric': 'drawFrameDuration.noise', 'value': 1.34, 'delta': 1.0}, + {'metric': 'totalUiFrame.average', 'value': 4150.0, 'delta': -16.0}, + ], + 'bar': [ + {'metric': 'preroll_frame.average', 'value': 65.5, 'delta': 5.0}, + {'metric': 'preroll_frame.outlierAverage', 'value': 1410.0, 'delta': 10.0}, + { + 'metric': 'preroll_frame.outlierRatio', + 'value': 20.0, + 'delta': -0.1999999999999993, + }, + {'metric': 'preroll_frame.noise', 'value': 1.5, 'delta': 0.65}, + {'metric': 'apply_frame.average', 'value': 50.0, 'delta': -30.0}, + {'metric': 'apply_frame.outlierAverage', 'value': 100.0, 'delta': -100.6}, + { + 'metric': 'apply_frame.outlierRatio', + 'value': 2.55, + 'delta': 0.04999999999999982, + }, + {'metric': 'apply_frame.noise', 'value': 0.9, 'delta': 0.5}, + { + 'metric': 'drawFrameDuration.average', + 'value': 2000.0, + 'delta': -58.90000000000009, + }, + { + 'metric': 'drawFrameDuration.outlierAverage', + 'value': 20000.0, + 'delta': -4000.0, + }, + {'metric': 'drawFrameDuration.outlierRatio', 'value': 11.05, 'delta': -1.0}, + {'metric': 'drawFrameDuration.noise', 'value': 1.34, 'delta': 1.0}, + {'metric': 'totalUiFrame.average', 'value': 4150.0, 'delta': -16.0}, + ], +}; diff --git a/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart b/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart new file mode 100644 index 00000000000..234ef9a0d87 --- /dev/null +++ b/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart @@ -0,0 +1,125 @@ +// Copyright 2023 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +import 'dart:convert'; +import 'dart:io'; + +import 'package:collection/collection.dart'; +import 'package:web_benchmarks/server.dart'; + +import 'utils.dart'; + +/// Compares two sets of web benchmarks and calculates the delta between each +/// matching metric. +void main(List args) { + if (args.length != 2) { + throw Exception( + 'Expected 2 arguments (, ), but instead there ' + 'were ${args.length}.', + ); + } + + final baselineSource = args[0]; + final testSource = args[1]; + + stdout + ..writeln('Comparing the following benchmark results:') + ..writeln(' "$testSource" (test)') + ..writeln(' "$baselineSource" (baseline)'); + + final baselineFile = checkFileExists(baselineSource); + final testFile = checkFileExists(testSource); + if (baselineFile == null || testFile == null) { + if (baselineFile == null) { + throw Exception('Cannot find baseline file $baselineSource'); + } + if (testFile == null) { + throw Exception('Cannot find test file $testSource'); + } + } + + final baselineResults = + BenchmarkResults.parse(jsonDecode(baselineFile.readAsStringSync())); + final testResults = + BenchmarkResults.parse(jsonDecode(testFile.readAsStringSync())); + compareBenchmarks( + baselineResults, + testResults, + baselineSource: baselineSource, + ); +} + +Map>> compareBenchmarks( + BenchmarkResults baseline, + BenchmarkResults test, { + required String baselineSource, +}) { + stdout.writeln('Starting baseline comparison...'); + + for (final benchmarkName in test.scores.keys) { + stdout.writeln('Comparing metrics for benchmark "$benchmarkName".'); + + // Lookup this benchmark in the baseline. + final baselineScores = baseline.scores[benchmarkName]; + if (baselineScores == null) { + stdout.writeln( + 'Baseline does not contain results for benchmark "$benchmarkName".', + ); + continue; + } + + final testScores = test.scores[benchmarkName]!; + + for (final score in testScores) { + // Lookup this metric in the baseline. + final baselineScore = + baselineScores.firstWhereOrNull((s) => s.metric == score.metric); + if (baselineScore == null) { + stdout.writeln( + 'Baseline does not contain metric "${score.metric}" for ' + 'benchmark "$benchmarkName".', + ); + continue; + } + + // Add the delta to the [testMetric]. + _benchmarkDeltas[score] = (score.value - baselineScore.value).toDouble(); + } + } + stdout.writeln('Baseline comparison finished.'); + + final comparisonAsMap = test.toJsonWithDeltas(); + stdout + ..writeln('==== Comparison with baseline $baselineSource ====') + ..writeln(const JsonEncoder.withIndent(' ').convert(comparisonAsMap)) + ..writeln('==== End of baseline comparison ===='); + return comparisonAsMap; +} + +Expando _benchmarkDeltas = Expando(); + +extension ScoreDeltaExtension on BenchmarkScore { + double? get deltaFromBaseline => _benchmarkDeltas[this]; +} + +extension ResultDeltaExtension on BenchmarkResults { + Map>> toJsonWithDeltas() { + return scores.map>>( + (String benchmarkName, List scores) { + return MapEntry>>( + benchmarkName, + scores.map>( + (BenchmarkScore score) { + final delta = _benchmarkDeltas[score]; + return { + ...score.toJson(), + if (delta != null) 'delta': delta, + }; + }, + ).toList(), + ); + }, + ); + } +} diff --git a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart index 5dfdab1b003..eb052867a9f 100644 --- a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart +++ b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart @@ -10,6 +10,8 @@ import 'package:web_benchmarks/server.dart'; import '../test_infra/common.dart'; import '../test_infra/project_root_directory.dart'; +import 'compare_benchmarks.dart'; +import 'utils.dart'; /// Runs the DevTools web benchmarks and reports the benchmark data. /// @@ -36,11 +38,36 @@ Future main(List args) async { final resultsAsJsonString = const JsonEncoder.withIndent(' ').convert(resultsAsMap); + if (benchmarkArgs.saveToFileLocation != null) { + final location = Uri.parse(benchmarkArgs.saveToFileLocation!); + File.fromUri(location) + ..createSync() + ..writeAsStringSync(resultsAsJsonString); + } + stdout ..writeln('==== Results ====') ..writeln(resultsAsJsonString) ..writeln('==== End of results ====') ..writeln(); + + final baselineSource = benchmarkArgs.baselineLocation; + if (baselineSource != null) { + final baselineFile = checkFileExists(baselineSource); + if (baselineFile != null) { + final baselineResults = BenchmarkResults.parse( + jsonDecode(baselineFile.readAsStringSync()), + ); + final testResults = BenchmarkResults.parse( + jsonDecode(resultsAsJsonString), + ); + compareBenchmarks( + baselineResults, + testResults, + baselineSource: baselineSource, + ); + } + } } class BenchmarkArgs { @@ -57,10 +84,18 @@ class BenchmarkArgs { bool get useWasm => argResults[_wasmFlag]; + String? get saveToFileLocation => argResults[_saveToFileOption]; + + String? get baselineLocation => argResults[_baselineOption]; + static const _browserFlag = 'browser'; static const _wasmFlag = 'wasm'; + static const _saveToFileOption = 'save-to-file'; + + static const _baselineOption = 'baseline'; + /// Builds an arg parser for DevTools benchmarks. static ArgParser _buildArgParser() { return ArgParser() @@ -71,6 +106,18 @@ class BenchmarkArgs { ..addFlag( _wasmFlag, help: 'Runs the benchmark tests with dart2wasm', + ) + ..addOption( + _saveToFileOption, + help: 'Saves the benchmark results to a JSON file at the given path.', + valueHelp: '/Users/me/Downloads/output.json', + ) + ..addOption( + _baselineOption, + help: 'The baseline benchmark data to compare this test run to. The ' + 'baseline file should be created by running this script with the ' + '$_saveToFileOption in a separate test run.', + valueHelp: '/Users/me/Downloads/baseline.json', ); } } diff --git a/packages/devtools_app/benchmark/scripts/utils.dart b/packages/devtools_app/benchmark/scripts/utils.dart new file mode 100644 index 00000000000..a4cbc99bead --- /dev/null +++ b/packages/devtools_app/benchmark/scripts/utils.dart @@ -0,0 +1,14 @@ +// Copyright 2023 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +import 'dart:io'; + +File? checkFileExists(String path) { + final testFile = File.fromUri(Uri.parse(path)); + if (!testFile.existsSync()) { + stdout.writeln('Could not locate file at $path.'); + return null; + } + return testFile; +} diff --git a/packages/devtools_app/pubspec.yaml b/packages/devtools_app/pubspec.yaml index 1b839ad86a1..0b7b31afbf6 100644 --- a/packages/devtools_app/pubspec.yaml +++ b/packages/devtools_app/pubspec.yaml @@ -79,7 +79,7 @@ dev_dependencies: mockito: ^5.4.1 stager: ^1.0.1 test: ^1.21.1 - web_benchmarks: ^1.0.0 + web_benchmarks: ^1.0.1 webkit_inspection_protocol: ">=0.5.0 <2.0.0" flutter: