Skip to content

Commit 11d4abe

Browse files
Add the ability to average benchmark runs (#6920)
1 parent 6dd6896 commit 11d4abe

File tree

3 files changed

+134
-29
lines changed

3 files changed

+134
-29
lines changed

packages/devtools_app/benchmark/devtools_benchmarks_test.dart

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -38,20 +38,16 @@ void main() {
3838
timeout: const Timeout(Duration(minutes: 10)),
3939
);
4040

41-
test(
42-
'Can compare web benchmarks',
43-
() {
44-
final benchmark1 = BenchmarkResults.parse(testBenchmarkResults1);
45-
final benchmark2 = BenchmarkResults.parse(testBenchmarkResults2);
46-
final comparison = compareBenchmarks(
47-
benchmark1,
48-
benchmark2,
49-
baselineSource: 'path/to/baseline',
50-
);
51-
expect(comparison, testBenchmarkComparison);
52-
},
53-
timeout: const Timeout(Duration(minutes: 10)),
54-
);
41+
test('Can compare web benchmarks', () {
42+
final benchmark1 = BenchmarkResults.parse(testBenchmarkResults1);
43+
final benchmark2 = BenchmarkResults.parse(testBenchmarkResults2);
44+
final comparison = compareBenchmarks(
45+
benchmark1,
46+
benchmark2,
47+
baselineSource: 'path/to/baseline',
48+
);
49+
expect(comparison, testBenchmarkComparison);
50+
});
5551

5652
// TODO(kenz): add tests that verify performance meets some expected threshold
5753
}

packages/devtools_app/benchmark/scripts/run_benchmarks.dart

Lines changed: 74 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,24 +15,39 @@ import 'utils.dart';
1515

1616
/// Runs the DevTools web benchmarks and reports the benchmark data.
1717
///
18-
/// Arguments:
19-
/// * --browser - runs the benchmark tests in the browser (non-headless mode)
20-
/// * --wasm - runs the benchmark tests with the dart2wasm compiler
21-
///
22-
/// See [BenchmarkArgs].
18+
/// To see available arguments, run this script with the `-h` flag.
2319
Future<void> main(List<String> args) async {
20+
if (args.isNotEmpty && args.first == '-h') {
21+
stdout.writeln(BenchmarkArgs._buildArgParser().usage);
22+
return;
23+
}
24+
2425
final benchmarkArgs = BenchmarkArgs(args);
26+
final benchmarkResults = <BenchmarkResults>[];
27+
for (var i = 0; i < benchmarkArgs.averageOf; i++) {
28+
stdout.writeln('Starting web benchmark tests (run #$i) ...');
29+
benchmarkResults.add(
30+
await serveWebBenchmark(
31+
benchmarkAppDirectory: projectRootDirectory(),
32+
entryPoint: 'benchmark/test_infra/client.dart',
33+
compilationOptions: CompilationOptions(useWasm: benchmarkArgs.useWasm),
34+
treeShakeIcons: false,
35+
initialPage: benchmarkInitialPage,
36+
headless: !benchmarkArgs.useBrowser,
37+
),
38+
);
39+
stdout.writeln('Web benchmark tests finished (run #$i).');
40+
}
2541

26-
stdout.writeln('Starting web benchmark tests...');
27-
final taskResult = await serveWebBenchmark(
28-
benchmarkAppDirectory: projectRootDirectory(),
29-
entryPoint: 'benchmark/test_infra/client.dart',
30-
compilationOptions: CompilationOptions(useWasm: benchmarkArgs.useWasm),
31-
treeShakeIcons: false,
32-
initialPage: benchmarkInitialPage,
33-
headless: !benchmarkArgs.useBrowser,
34-
);
35-
stdout.writeln('Web benchmark tests finished.');
42+
late final BenchmarkResults taskResult;
43+
if (benchmarkArgs.averageOf == 1) {
44+
taskResult = benchmarkResults.first;
45+
} else {
46+
stdout.writeln(
47+
'Taking the average of ${benchmarkResults.length} benchmark runs.',
48+
);
49+
taskResult = averageBenchmarkResults(benchmarkResults);
50+
}
3651

3752
final resultsAsMap = taskResult.toJson();
3853
final resultsAsJsonString =
@@ -84,6 +99,8 @@ class BenchmarkArgs {
8499

85100
bool get useWasm => argResults[_wasmFlag];
86101

102+
int get averageOf => int.parse(argResults[_averageOfOption]);
103+
87104
String? get saveToFileLocation => argResults[_saveToFileOption];
88105

89106
String? get baselineLocation => argResults[_baselineOption];
@@ -96,15 +113,19 @@ class BenchmarkArgs {
96113

97114
static const _baselineOption = 'baseline';
98115

116+
static const _averageOfOption = 'average-of';
117+
99118
/// Builds an arg parser for DevTools benchmarks.
100119
static ArgParser _buildArgParser() {
101120
return ArgParser()
102121
..addFlag(
103122
_browserFlag,
123+
negatable: false,
104124
help: 'Runs the benchmark tests in browser mode (not headless mode).',
105125
)
106126
..addFlag(
107127
_wasmFlag,
128+
negatable: false,
108129
help: 'Runs the benchmark tests with dart2wasm',
109130
)
110131
..addOption(
@@ -118,6 +139,44 @@ class BenchmarkArgs {
118139
'baseline file should be created by running this script with the '
119140
'$_saveToFileOption in a separate test run.',
120141
valueHelp: '/Users/me/Downloads/baseline.json',
142+
)
143+
..addOption(
144+
_averageOfOption,
145+
defaultsTo: '1',
146+
help: 'The number of times to run the benchmark. The returned results '
147+
'will be the average of all the benchmark runs when this value is '
148+
'greater than 1.',
149+
valueHelp: '5',
121150
);
122151
}
123152
}
153+
154+
// TODO(kenz): upstream the logic to average benchmarks into the
155+
// package:web_benchmarks
156+
157+
/// Returns the average of the benchmark results in [results].
158+
///
159+
/// Each element in [results] is expected to have identical benchmark names and
160+
/// metrics; otherwise, an [Exception] will be thrown.
161+
BenchmarkResults averageBenchmarkResults(List<BenchmarkResults> results) {
162+
if (results.isEmpty) {
163+
throw Exception('Cannot take average of empty list.');
164+
}
165+
166+
var totalSum = results.first;
167+
for (int i = 1; i < results.length; i++) {
168+
final current = results[i];
169+
totalSum = totalSum.sumWith(current);
170+
}
171+
172+
final average = totalSum.toJson();
173+
for (final benchmark in totalSum.scores.keys) {
174+
final scoresForBenchmark = totalSum.scores[benchmark]!;
175+
for (int i = 0; i < scoresForBenchmark.length; i++) {
176+
final score = scoresForBenchmark[i];
177+
final averageValue = score.value / results.length;
178+
average[benchmark]![i]['value'] = averageValue;
179+
}
180+
}
181+
return BenchmarkResults.parse(average);
182+
}

packages/devtools_app/benchmark/scripts/utils.dart

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44

55
import 'dart:io';
66

7+
import 'package:collection/collection.dart';
8+
import 'package:web_benchmarks/server.dart';
9+
710
File? checkFileExists(String path) {
811
final testFile = File.fromUri(Uri.parse(path));
912
if (!testFile.existsSync()) {
@@ -12,3 +15,50 @@ File? checkFileExists(String path) {
1215
}
1316
return testFile;
1417
}
18+
19+
extension BenchmarkResultsExtension on BenchmarkResults {
20+
/// Sums this [BenchmarkResults] instance with [other] by adding the values
21+
/// of each matching benchmark score.
22+
///
23+
/// Returns a [BenchmarkResults] object with the summed values.
24+
BenchmarkResults sumWith(
25+
BenchmarkResults other, {
26+
bool throwExceptionOnMismatch = true,
27+
}) {
28+
final sum = toJson();
29+
for (final benchmark in scores.keys) {
30+
// Look up this benchmark in [other].
31+
final matchingBenchmark = other.scores[benchmark];
32+
if (matchingBenchmark == null) {
33+
if (throwExceptionOnMismatch) {
34+
throw Exception(
35+
'Cannot sum benchmarks because [other] is missing an entry for '
36+
'benchmark "$benchmark".',
37+
);
38+
}
39+
continue;
40+
}
41+
42+
final scoresForBenchmark = scores[benchmark]!;
43+
for (int i = 0; i < scoresForBenchmark.length; i++) {
44+
final score = scoresForBenchmark[i];
45+
// Look up this score in the [matchingBenchmark] from [other].
46+
final matchingScore =
47+
matchingBenchmark.firstWhereOrNull((s) => s.metric == score.metric);
48+
if (matchingScore == null) {
49+
if (throwExceptionOnMismatch) {
50+
throw Exception(
51+
'Cannot sum benchmarks because benchmark "$benchmark" is missing '
52+
'a score for metric ${score.metric}.',
53+
);
54+
}
55+
continue;
56+
}
57+
58+
final sumScore = score.value + matchingScore.value;
59+
sum[benchmark]![i]['value'] = sumScore;
60+
}
61+
}
62+
return BenchmarkResults.parse(sum);
63+
}
64+
}

0 commit comments

Comments
 (0)