Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add the ability to compare benchmark runs
  • Loading branch information
kenzieschmoll committed Dec 8, 2023
commit 79cb9a6d7b89aa62f860ebba2b95c03f39648c37
22 changes: 21 additions & 1 deletion packages/devtools_app/benchmark/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,24 @@ The size benchmark must be ran by itself because it actually modifies the
`devtools_app/build` folder to create and measure the release build web bundle size.
If this test is ran while other tests are running, it can affect the measurements
that the size benchmark test takes, and it can affect the DevTools build that
the other running tests are using with.
the other running tests are using.

## Adding a new benchmark test or test case

The tests are defined by "automators", which live in the `benchmark/test_infra/automators`
directory. To add a new test or test case, either modify an existing automator or add
a new one for a new screen. Follow existing examples in that directory for guidance.

## Comparing two benchmark test runs

In order to compare two different benchmark runs, you first need to run the benchmark
tests and save the results to a file:
```sh
dart run benchmark/scripts/run_benchmarks.dart --save-to-file=baseline.json
dart run benchmark/scripts/run_benchmarks.dart --save-to-file=test.json
```

Then, to compare the benchmarks and calculate deltas, run:
```sh
dart run benchmark/scripts/compare_benchmarks.dart baseline_file.json test_file.json
```
126 changes: 126 additions & 0 deletions packages/devtools_app/benchmark/scripts/compare_benchmarks.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
// Copyright 2023 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

import 'dart:convert';
import 'dart:io';

import 'package:collection/collection.dart';
import 'package:web_benchmarks/server.dart';

import 'utils.dart';

/// Compares two sets of web benchmarks and calculates the delta between each
/// matching metric.
void main(List<String> args) {
if (args.length != 2) {
throw Exception(
'Expected 2 arguments (<baseline-file>, <test-file>), but instead there '
'were ${args.length}.',
);
}

final baselineSource = args[0];
final testSource = args[1];

stdout
..writeln('Comparing the following benchmark results:')
..writeln(' "$testSource" (test)')
..writeln(' "$baselineSource" (baseline)');

final baselineFile = checkFileExists(baselineSource);
final testFile = checkFileExists(testSource);
if (baselineFile == null || testFile == null) {
if (baselineFile == null) {
throw Exception('Cannot find baseline file $baselineSource');
}
if (testFile == null) {
throw Exception('Cannot find test file $testSource');
}
}

final baselineResults =
BenchmarkResults.parse(jsonDecode(baselineFile.readAsStringSync()));
final testResults =
BenchmarkResults.parse(jsonDecode(testFile.readAsStringSync()));
compareBenchmarks(
baselineResults,
testResults,
baselineSource: baselineSource,
);
}

void compareBenchmarks(
BenchmarkResults baseline,
BenchmarkResults test, {
required String baselineSource,
}) {
stdout.writeln('Starting baseline comparison...');

for (final benchmarkName in test.scores.keys) {
stdout.writeln('Comparing metrics for benchmark "$benchmarkName".');

// Lookup this benchmark in the baseline.
final baselineScores = baseline.scores[benchmarkName];
if (baselineScores == null) {
stdout.writeln(
'Baseline does not contain results for benchmark "$benchmarkName".',
);
continue;
}

final testScores = test.scores[benchmarkName]!;

for (final score in testScores) {
// Lookup this metric in the baseline.
final baselineScore =
baselineScores.firstWhereOrNull((s) => s.metric == score.metric);
if (baselineScore == null) {
stdout.writeln(
'Baseline does not contain metric "${score.metric}" for '
'benchmark "$benchmarkName".',
);
continue;
}

// Add the delta to the [testMetric].
_benchmarkDeltas[score] = (score.value - baselineScore.value).toDouble();
// score.deltaFromBaseline = score.value - baselineScore.value;
}
}
stdout.writeln('Baseline comparison finished.');

stdout
..writeln('==== Comparison with baseline $baselineSource ====')
..writeln(
const JsonEncoder.withIndent(' ').convert(test.toJsonWithDeltas()),
)
..writeln('==== End of baseline comparison ====');
}

Expando<double> _benchmarkDeltas = Expando<double>();

extension ScoreDeltaExtension on BenchmarkScore {
double? get deltaFromBaseline => _benchmarkDeltas[this];
}

extension ResultDeltaExtension on BenchmarkResults {
Map<String, List<Map<String, dynamic>>> toJsonWithDeltas() {
return scores.map<String, List<Map<String, dynamic>>>(
(String benchmarkName, List<BenchmarkScore> scores) {
return MapEntry<String, List<Map<String, dynamic>>>(
benchmarkName,
scores.map<Map<String, dynamic>>(
(BenchmarkScore score) {
final delta = _benchmarkDeltas[score];
return <String, dynamic>{
...score.toJson(),
if (delta != null) 'delta': delta,
};
},
).toList(),
);
},
);
}
}
42 changes: 42 additions & 0 deletions packages/devtools_app/benchmark/scripts/run_benchmarks.dart
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import 'package:web_benchmarks/server.dart';

import '../test_infra/common.dart';
import '../test_infra/project_root_directory.dart';
import 'compare_benchmarks.dart';
import 'utils.dart';

/// Runs the DevTools web benchmarks and reports the benchmark data.
///
Expand Down Expand Up @@ -48,6 +50,24 @@ Future<void> main(List<String> args) async {
..writeln(resultsAsJsonString)
..writeln('==== End of results ====')
..writeln();

final baselineSource = benchmarkArgs.baselineLocation;
if (baselineSource != null) {
final baselineFile = checkFileExists(baselineSource);
if (baselineFile != null) {
final baselineResults = BenchmarkResults.parse(
jsonDecode(baselineFile.readAsStringSync()),
);
final testResults = BenchmarkResults.parse(
jsonDecode(resultsAsJsonString),
);
compareBenchmarks(
baselineResults,
testResults,
baselineSource: baselineSource,
);
}
}
}

class BenchmarkArgs {
Expand All @@ -64,10 +84,18 @@ class BenchmarkArgs {

bool get useWasm => argResults[_wasmFlag];

String? get saveToFileLocation => argResults[_saveToFileOption];

String? get baselineLocation => argResults[_baselineOption];

static const _browserFlag = 'browser';

static const _wasmFlag = 'wasm';

static const _baselineOption = 'baseline';

static const _saveToFileOption = 'save-to-file';

/// Builds an arg parser for DevTools benchmarks.
static ArgParser _buildArgParser() {
return ArgParser()
Expand All @@ -78,6 +106,20 @@ class BenchmarkArgs {
..addFlag(
_wasmFlag,
help: 'Runs the benchmark tests with dart2wasm',
)
..addOption(
_saveToFileOption,
help: 'Saves the benchmark results to a JSON file at the given path.',
valueHelp: '/Users/me/Downloads/output.json',
)
..addOption(
_baselineOption,
help: 'The baseline benchmark data to compare this test run to. The '
'baseline file should be created by running this script with the '
'$_saveToFileOption in a separate test run.',
valueHelp: '/Users/me/Downloads/baseline.json',
);
}
}
// BenchmarkResults _averageBenchmarkResults(List<BenchmarkResults> results) {
// }
14 changes: 14 additions & 0 deletions packages/devtools_app/benchmark/scripts/utils.dart.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Copyright 2023 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

import 'dart:io';

File? checkFileExists(String path) {
final testFile = File.fromUri(Uri.parse(path));
if (!testFile.existsSync()) {
stdout.writeln('Could not locate file at $path.');
return null;
}
return testFile;
}