Skip to content
Merged
4 changes: 4 additions & 0 deletions packages/web_benchmarks/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 1.1.0

* Adds `computeAverage` and `computeDelta` methods to support analysis of benchmark results.

## 1.0.1

* Adds `parse` constructors for the `BenchmarkResults` and `BenchmarkScore` classes.
Expand Down
43 changes: 43 additions & 0 deletions packages/web_benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,46 @@ app's code and assets. Additionally, the server communicates with the browser to
extract the performance traces.

[1]: https://github.com/flutter/packages/blob/master/packages/web_benchmarks/testing/web_benchmarks_test.dart

# Analyzing benchmark results

After running web benchmarks, you may want to analyze the results or compare
with the results from other benchmark runs. The `web_benchmarks` package
supports the following analysis operations:

* compute the delta between two benchmark results
* compute the average of a set of benchmark results

```dart
import 'dart:convert';
import 'dart:io';

import 'package:web_benchmarks/analysis.dart';

void main() {
final baseline = '/path/to/benchmark_baseline.json';
final test1 = '/path/to/benchmark_test_1.json';
final test2 = '/path/to/benchmark_test_2.json';
final baselineFile = File.fromUri(Uri.parse(baseline));
final testFile1 = File.fromUri(Uri.parse(test1));
final testFile2 = File.fromUri(Uri.parse(test2));

final baselineResults = BenchmarkResults.parse(
jsonDecode(baselineFile.readAsStringSync()),
);
final testResults1 = BenchmarkResults.parse(
jsonDecode(testFile1.readAsStringSync()),
);
final testResults2 = BenchmarkResults.parse(
jsonDecode(testFile2.readAsStringSync()),
);

// Compute the delta between [baselineResults] and [testResults1].
final Map<String, List<Map<String, Object?>>> delta = computeDelta(baselineResults, testResults1);
print(delta);

// Compute the average of [testResults] and [testResults2].
final BenchmarkResults average = computeAverage(<BenchmarkResults>[testResults1, testResults2]);
print(average.toJson());
}
```
142 changes: 142 additions & 0 deletions packages/web_benchmarks/lib/analysis.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
// Copyright 2013 The Flutter Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

import 'package:collection/collection.dart';
import 'server.dart';

export 'src/benchmark_result.dart';

/// Returns the average of the benchmark results in [results].
///
/// Each element in [results] is expected to have identical benchmark names and
/// metrics; otherwise, an [Exception] will be thrown.
BenchmarkResults computeAverage(List<BenchmarkResults> results) {
if (results.isEmpty) {
throw Exception('Cannot take average of empty list.');
}

BenchmarkResults totalSum = results.first;
for (int i = 1; i < results.length; i++) {
final BenchmarkResults current = results[i];
totalSum = totalSum._sumWith(current);
}

final Map<String, List<Map<String, Object?>>> average = totalSum.toJson();
for (final String benchmark in totalSum.scores.keys) {
final List<BenchmarkScore> scoresForBenchmark = totalSum.scores[benchmark]!;
for (int i = 0; i < scoresForBenchmark.length; i++) {
final BenchmarkScore score = scoresForBenchmark[i];
final double averageValue = score.value / results.length;
average[benchmark]![i][BenchmarkScore.valueKey] = averageValue;
}
}
return BenchmarkResults.parse(average);
}

/// Computes the delta between [test] and [baseline], and returns the results
/// as a JSON object where each benchmark score entry contains a new field
/// 'delta' with the metric value comparison.
Map<String, List<Map<String, Object?>>> computeDelta(
BenchmarkResults baseline,
BenchmarkResults test,
) {
for (final String benchmarkName in test.scores.keys) {
// Lookup this benchmark in the baseline.
final List<BenchmarkScore>? baselineScores = baseline.scores[benchmarkName];
if (baselineScores == null) {
continue;
}

final List<BenchmarkScore> testScores = test.scores[benchmarkName]!;
for (final BenchmarkScore score in testScores) {
// Lookup this metric in the baseline.
final BenchmarkScore? baselineScore = baselineScores
.firstWhereOrNull((BenchmarkScore s) => s.metric == score.metric);
if (baselineScore == null) {
continue;
}

// Add the delta to the [testMetric].
_benchmarkDeltas[score] = (score.value - baselineScore.value).toDouble();
}
}
return test._toJsonWithDeltas();
}

/// An expando to hold benchmark delta values computed during a [computeDelta]
/// operation.
Expando<double> _benchmarkDeltas = Expando<double>();

extension _AnalysisExtension on BenchmarkResults {
/// Returns the JSON representation of this [BenchmarkResults] instance with
/// an added field 'delta' that contains the delta for this metric as computed
/// by the [compareBenchmarks] method.
Map<String, List<Map<String, Object?>>> _toJsonWithDeltas() {
return scores.map<String, List<Map<String, Object?>>>(
(String benchmarkName, List<BenchmarkScore> scores) {
return MapEntry<String, List<Map<String, Object?>>>(
benchmarkName,
scores.map<Map<String, Object?>>(
(BenchmarkScore score) {
final double? delta = _benchmarkDeltas[score];
return <String, Object?>{
...score.toJson(),
if (delta != null) 'delta': delta,
};
},
).toList(),
);
},
);
}

/// Sums this [BenchmarkResults] instance with [other] by adding the values
/// of each matching benchmark score.
///
/// Returns a [BenchmarkResults] object with the summed values.
///
/// When [throwExceptionOnMismatch] is true (default), the set of benchmark
/// names and metric names in [other] are expected to be identical to those in
/// [scores], or else an [Exception] will be thrown.
BenchmarkResults _sumWith(
BenchmarkResults other, {
bool throwExceptionOnMismatch = true,
}) {
final Map<String, List<Map<String, Object?>>> sum = toJson();
for (final String benchmark in scores.keys) {
// Look up this benchmark in [other].
final List<BenchmarkScore>? matchingBenchmark = other.scores[benchmark];
if (matchingBenchmark == null) {
if (throwExceptionOnMismatch) {
throw Exception(
'Cannot sum benchmarks because [other] is missing an entry for '
'benchmark "$benchmark".',
);
}
continue;
}

final List<BenchmarkScore> scoresForBenchmark = scores[benchmark]!;
for (int i = 0; i < scoresForBenchmark.length; i++) {
final BenchmarkScore score = scoresForBenchmark[i];
// Look up this score in the [matchingBenchmark] from [other].
final BenchmarkScore? matchingScore = matchingBenchmark
.firstWhereOrNull((BenchmarkScore s) => s.metric == score.metric);
if (matchingScore == null) {
if (throwExceptionOnMismatch) {
throw Exception(
'Cannot sum benchmarks because benchmark "$benchmark" is missing '
'a score for metric ${score.metric}.',
);
}
continue;
}

final num sumScore = score.value + matchingScore.value;
sum[benchmark]![i][BenchmarkScore.valueKey] = sumScore;
}
}
return BenchmarkResults.parse(sum);
}
}
16 changes: 10 additions & 6 deletions packages/web_benchmarks/lib/src/benchmark_result.dart
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,17 @@ class BenchmarkScore {

/// Deserializes a JSON object to create a [BenchmarkScore] object.
factory BenchmarkScore.parse(Map<String, Object?> json) {
final String metric = json[_metricKey]! as String;
final double value = (json[_valueKey]! as num).toDouble();
final String metric = json[metricKey]! as String;
final double value = (json[valueKey]! as num).toDouble();
return BenchmarkScore(metric: metric, value: value);
}

static const String _metricKey = 'metric';
static const String _valueKey = 'value';
/// The key for the value [metric] in the [BenchmarkScore] JSON
/// representation.
static const String metricKey = 'metric';

/// The key for the value [value] in the [BenchmarkScore] JSON representation.
static const String valueKey = 'value';

/// The name of the metric that this score is categorized under.
///
Expand All @@ -34,8 +38,8 @@ class BenchmarkScore {
/// Serializes the benchmark metric to a JSON object.
Map<String, Object?> toJson() {
return <String, Object?>{
_metricKey: metric,
_valueKey: value,
metricKey: metric,
valueKey: value,
};
}
}
Expand Down
3 changes: 2 additions & 1 deletion packages/web_benchmarks/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@ name: web_benchmarks
description: A benchmark harness for performance-testing Flutter apps in Chrome.
repository: https://github.com/flutter/packages/tree/main/packages/web_benchmarks
issue_tracker: https://github.com/flutter/flutter/issues?q=is%3Aissue+is%3Aopen+label%3A%22p%3A+web_benchmarks%22
version: 1.0.1
version: 1.1.0

environment:
sdk: ">=3.2.0 <4.0.0"
flutter: ">=3.16.0"

dependencies:
collection: ^1.18.0
flutter:
sdk: flutter
flutter_test:
Expand Down
Loading