diff --git a/packages/web_benchmarks/CHANGELOG.md b/packages/web_benchmarks/CHANGELOG.md index cb26fd04c5f..5641ddfb3ab 100644 --- a/packages/web_benchmarks/CHANGELOG.md +++ b/packages/web_benchmarks/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.1.0 + +* Adds `computeAverage` and `computeDelta` methods to support analysis of benchmark results. + ## 1.0.1 * Adds `parse` constructors for the `BenchmarkResults` and `BenchmarkScore` classes. diff --git a/packages/web_benchmarks/README.md b/packages/web_benchmarks/README.md index cc38fc92cea..ce98a22b194 100644 --- a/packages/web_benchmarks/README.md +++ b/packages/web_benchmarks/README.md @@ -13,3 +13,45 @@ app's code and assets. Additionally, the server communicates with the browser to extract the performance traces. [1]: https://github.com/flutter/packages/blob/master/packages/web_benchmarks/testing/web_benchmarks_test.dart + +# Analyzing benchmark results + +After running web benchmarks, you may want to analyze the results or compare +with the results from other benchmark runs. The `web_benchmarks` package +supports the following analysis operations: + +* compute the delta between two benchmark results +* compute the average of a set of benchmark results + + +```dart +import 'dart:convert'; +import 'dart:io'; + +import 'package:web_benchmarks/analysis.dart'; + +void main() { + final BenchmarkResults baselineResults = + _benchmarkResultsFromFile('/path/to/benchmark_baseline.json'); + final BenchmarkResults testResults1 = + _benchmarkResultsFromFile('/path/to/benchmark_test_1.json'); + final BenchmarkResults testResults2 = + _benchmarkResultsFromFile('/path/to/benchmark_test_2.json'); + + // Compute the delta between [baselineResults] and [testResults1]. + final BenchmarkResults delta = computeDelta(baselineResults, testResults1); + stdout.writeln(delta.toJson()); + + // Compute the average of [testResults] and [testResults2]. + final BenchmarkResults average = + computeAverage([testResults1, testResults2]); + stdout.writeln(average.toJson()); +} + +BenchmarkResults _benchmarkResultsFromFile(String path) { + final File file = File.fromUri(Uri.parse(path)); + final Map fileContentAsJson = + jsonDecode(file.readAsStringSync()) as Map; + return BenchmarkResults.parse(fileContentAsJson); +} +``` diff --git a/packages/web_benchmarks/example/analyze_example.dart b/packages/web_benchmarks/example/analyze_example.dart new file mode 100644 index 00000000000..bf2721ac594 --- /dev/null +++ b/packages/web_benchmarks/example/analyze_example.dart @@ -0,0 +1,35 @@ +// Copyright 2013 The Flutter Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// #docregion analyze +import 'dart:convert'; +import 'dart:io'; + +import 'package:web_benchmarks/analysis.dart'; + +void main() { + final BenchmarkResults baselineResults = + _benchmarkResultsFromFile('/path/to/benchmark_baseline.json'); + final BenchmarkResults testResults1 = + _benchmarkResultsFromFile('/path/to/benchmark_test_1.json'); + final BenchmarkResults testResults2 = + _benchmarkResultsFromFile('/path/to/benchmark_test_2.json'); + + // Compute the delta between [baselineResults] and [testResults1]. + final BenchmarkResults delta = computeDelta(baselineResults, testResults1); + stdout.writeln(delta.toJson()); + + // Compute the average of [testResults] and [testResults2]. + final BenchmarkResults average = + computeAverage([testResults1, testResults2]); + stdout.writeln(average.toJson()); +} + +BenchmarkResults _benchmarkResultsFromFile(String path) { + final File file = File.fromUri(Uri.parse(path)); + final Map fileContentAsJson = + jsonDecode(file.readAsStringSync()) as Map; + return BenchmarkResults.parse(fileContentAsJson); +} +// #enddocregion analyze diff --git a/packages/web_benchmarks/lib/analysis.dart b/packages/web_benchmarks/lib/analysis.dart new file mode 100644 index 00000000000..73b5d6e78c5 --- /dev/null +++ b/packages/web_benchmarks/lib/analysis.dart @@ -0,0 +1,121 @@ +// Copyright 2013 The Flutter Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +import 'package:collection/collection.dart'; +import 'server.dart'; + +export 'src/benchmark_result.dart'; + +/// Returns the average of the benchmark results in [results]. +/// +/// Each element in [results] is expected to have identical benchmark names and +/// metrics; otherwise, an [Exception] will be thrown. +BenchmarkResults computeAverage(List results) { + if (results.isEmpty) { + throw ArgumentError('Cannot take average of empty list.'); + } + + final BenchmarkResults totalSum = results.reduce( + (BenchmarkResults sum, BenchmarkResults next) => sum._sumWith(next), + ); + + final BenchmarkResults average = totalSum; + for (final String benchmark in totalSum.scores.keys) { + final List scoresForBenchmark = totalSum.scores[benchmark]!; + for (int i = 0; i < scoresForBenchmark.length; i++) { + final BenchmarkScore score = scoresForBenchmark[i]; + final double averageValue = score.value / results.length; + average.scores[benchmark]![i] = + BenchmarkScore(metric: score.metric, value: averageValue); + } + } + return average; +} + +/// Computes the delta for each matching metric in [test] and [baseline], and +/// returns a new [BenchmarkResults] object where each [BenchmarkScore] contains +/// a [delta] value. +BenchmarkResults computeDelta( + BenchmarkResults baseline, + BenchmarkResults test, +) { + final Map> delta = + >{}; + for (final String benchmarkName in test.scores.keys) { + final List testScores = test.scores[benchmarkName]!; + final List? baselineScores = baseline.scores[benchmarkName]; + delta[benchmarkName] = testScores.map( + (BenchmarkScore testScore) { + final BenchmarkScore? baselineScore = baselineScores?.firstWhereOrNull( + (BenchmarkScore s) => s.metric == testScore.metric); + return testScore._copyWith( + delta: baselineScore == null + ? null + : (testScore.value - baselineScore.value).toDouble(), + ); + }, + ).toList(); + } + return BenchmarkResults(delta); +} + +extension _AnalysisExtension on BenchmarkResults { + /// Sums this [BenchmarkResults] instance with [other] by adding the values + /// of each matching benchmark score. + /// + /// Returns a [BenchmarkResults] object with the summed values. + /// + /// When [throwExceptionOnMismatch] is true (default), the set of benchmark + /// names and metric names in [other] are expected to be identical to those in + /// [scores], or else an [Exception] will be thrown. + BenchmarkResults _sumWith( + BenchmarkResults other, { + bool throwExceptionOnMismatch = true, + }) { + final Map> sum = + >{}; + for (final String benchmark in scores.keys) { + // Look up this benchmark in [other]. + final List? matchingBenchmark = other.scores[benchmark]; + if (matchingBenchmark == null) { + if (throwExceptionOnMismatch) { + throw Exception( + 'Cannot sum benchmarks because [other] is missing an entry for ' + 'benchmark "$benchmark".', + ); + } + continue; + } + + final List scoresForBenchmark = scores[benchmark]!; + sum[benchmark] = + scoresForBenchmark.map((BenchmarkScore score) { + // Look up this score in the [matchingBenchmark] from [other]. + final BenchmarkScore? matchingScore = matchingBenchmark + .firstWhereOrNull((BenchmarkScore s) => s.metric == score.metric); + if (matchingScore == null && throwExceptionOnMismatch) { + throw Exception( + 'Cannot sum benchmarks because benchmark "$benchmark" is missing ' + 'a score for metric ${score.metric}.', + ); + } + return score._copyWith( + value: matchingScore == null + ? score.value + : score.value + matchingScore.value, + ); + }).toList(); + } + return BenchmarkResults(sum); + } +} + +extension _CopyExtension on BenchmarkScore { + BenchmarkScore _copyWith({String? metric, num? value, num? delta}) => + BenchmarkScore( + metric: metric ?? this.metric, + value: value ?? this.value, + delta: delta ?? this.delta, + ); +} diff --git a/packages/web_benchmarks/lib/src/benchmark_result.dart b/packages/web_benchmarks/lib/src/benchmark_result.dart index 445dc939489..ac80c747345 100644 --- a/packages/web_benchmarks/lib/src/benchmark_result.dart +++ b/packages/web_benchmarks/lib/src/benchmark_result.dart @@ -10,17 +10,26 @@ class BenchmarkScore { BenchmarkScore({ required this.metric, required this.value, + this.delta, }); /// Deserializes a JSON object to create a [BenchmarkScore] object. factory BenchmarkScore.parse(Map json) { - final String metric = json[_metricKey]! as String; - final double value = (json[_valueKey]! as num).toDouble(); - return BenchmarkScore(metric: metric, value: value); + final String metric = json[metricKey]! as String; + final double value = (json[valueKey]! as num).toDouble(); + final num? delta = json[deltaKey] as num?; + return BenchmarkScore(metric: metric, value: value, delta: delta); } - static const String _metricKey = 'metric'; - static const String _valueKey = 'value'; + /// The key for the value [metric] in the [BenchmarkScore] JSON + /// representation. + static const String metricKey = 'metric'; + + /// The key for the value [value] in the [BenchmarkScore] JSON representation. + static const String valueKey = 'value'; + + /// The key for the value [delta] in the [BenchmarkScore] JSON representation. + static const String deltaKey = 'delta'; /// The name of the metric that this score is categorized under. /// @@ -31,11 +40,18 @@ class BenchmarkScore { /// The result of measuring a particular metric in this benchmark run. final num value; + /// Optional delta value describing the difference between this metric's score + /// and the score of a matching metric from another [BenchmarkResults]. + /// + /// This value may be assigned by the [computeDelta] analysis method. + final num? delta; + /// Serializes the benchmark metric to a JSON object. Map toJson() { return { - _metricKey: metric, - _valueKey: value, + metricKey: metric, + valueKey: value, + if (delta != null) deltaKey: delta, }; } } @@ -53,7 +69,7 @@ class BenchmarkResults { final List scores = (json[key]! as List) .cast>() .map(BenchmarkScore.parse) - .toList(); + .toList(growable: false); results[key] = scores; } return BenchmarkResults(results); diff --git a/packages/web_benchmarks/pubspec.yaml b/packages/web_benchmarks/pubspec.yaml index 7c090ca62ad..117d8dd246e 100644 --- a/packages/web_benchmarks/pubspec.yaml +++ b/packages/web_benchmarks/pubspec.yaml @@ -2,13 +2,14 @@ name: web_benchmarks description: A benchmark harness for performance-testing Flutter apps in Chrome. repository: https://github.com/flutter/packages/tree/main/packages/web_benchmarks issue_tracker: https://github.com/flutter/flutter/issues?q=is%3Aissue+is%3Aopen+label%3A%22p%3A+web_benchmarks%22 -version: 1.0.1 +version: 1.1.0 environment: sdk: ">=3.2.0 <4.0.0" flutter: ">=3.16.0" dependencies: + collection: ^1.18.0 flutter: sdk: flutter flutter_test: diff --git a/packages/web_benchmarks/test/src/analysis_test.dart b/packages/web_benchmarks/test/src/analysis_test.dart new file mode 100644 index 00000000000..852cb4c6408 --- /dev/null +++ b/packages/web_benchmarks/test/src/analysis_test.dart @@ -0,0 +1,330 @@ +// Copyright 2013 The Flutter Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Copyright 2013 The Flutter Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +import 'package:flutter_test/flutter_test.dart'; +import 'package:web_benchmarks/analysis.dart'; + +void main() { + group('averageBenchmarkResults', () { + test('succeeds for identical benchmark names and metrics', () { + final BenchmarkResults result1 = BenchmarkResults( + >{ + 'foo': [ + BenchmarkScore(metric: 'foo.bar', value: 6), + BenchmarkScore(metric: 'foo.baz', value: 10), + ], + 'bar': [ + BenchmarkScore(metric: 'bar.foo', value: 2.4), + ], + }, + ); + final BenchmarkResults result2 = BenchmarkResults( + >{ + 'foo': [ + BenchmarkScore(metric: 'foo.bar', value: 4), + BenchmarkScore(metric: 'foo.baz', value: 10), + ], + 'bar': [ + BenchmarkScore(metric: 'bar.foo', value: 1.2), + ], + }, + ); + final BenchmarkResults average = + computeAverage([result1, result2]); + expect( + average.toJson(), + >>{ + 'foo': >[ + {'metric': 'foo.bar', 'value': 5}, + {'metric': 'foo.baz', 'value': 10}, + ], + 'bar': >[ + {'metric': 'bar.foo', 'value': 1.7999999999999998}, + ], + }, + ); + }); + + test('fails for mismatched benchmark names', () { + final BenchmarkResults result1 = BenchmarkResults( + >{ + 'foo': [BenchmarkScore(metric: 'foo.bar', value: 6)], + }, + ); + final BenchmarkResults result2 = BenchmarkResults( + >{ + 'foo1': [BenchmarkScore(metric: 'foo.bar', value: 4)], + }, + ); + expect( + () { + computeAverage([result1, result2]); + }, + throwsException, + ); + }); + + test('fails for mismatched benchmark metrics', () { + final BenchmarkResults result1 = BenchmarkResults( + >{ + 'foo': [BenchmarkScore(metric: 'foo.bar', value: 6)], + }, + ); + final BenchmarkResults result2 = BenchmarkResults( + >{ + 'foo': [BenchmarkScore(metric: 'foo.boo', value: 4)], + }, + ); + expect( + () { + computeAverage([result1, result2]); + }, + throwsException, + ); + }); + }); + + test('computeDelta', () { + final BenchmarkResults benchmark1 = + BenchmarkResults.parse(testBenchmarkResults1); + final BenchmarkResults benchmark2 = + BenchmarkResults.parse(testBenchmarkResults2); + final BenchmarkResults delta = computeDelta(benchmark1, benchmark2); + expect(delta.toJson(), expectedBenchmarkDelta); + }); +} + +final Map>> testBenchmarkResults1 = + >>{ + 'foo': >[ + {'metric': 'preroll_frame.average', 'value': 60.5}, + {'metric': 'preroll_frame.outlierAverage', 'value': 1400}, + {'metric': 'preroll_frame.outlierRatio', 'value': 20.2}, + {'metric': 'preroll_frame.noise', 'value': 0.85}, + {'metric': 'apply_frame.average', 'value': 80.0}, + {'metric': 'apply_frame.outlierAverage', 'value': 200.6}, + {'metric': 'apply_frame.outlierRatio', 'value': 2.5}, + {'metric': 'apply_frame.noise', 'value': 0.4}, + {'metric': 'drawFrameDuration.average', 'value': 2058.9}, + { + 'metric': 'drawFrameDuration.outlierAverage', + 'value': 24000, + }, + { + 'metric': 'drawFrameDuration.outlierRatio', + 'value': 12.05, + }, + {'metric': 'drawFrameDuration.noise', 'value': 0.34}, + {'metric': 'totalUiFrame.average', 'value': 4166}, + ], + 'bar': >[ + {'metric': 'preroll_frame.average', 'value': 60.5}, + {'metric': 'preroll_frame.outlierAverage', 'value': 1400}, + {'metric': 'preroll_frame.outlierRatio', 'value': 20.2}, + {'metric': 'preroll_frame.noise', 'value': 0.85}, + {'metric': 'apply_frame.average', 'value': 80.0}, + {'metric': 'apply_frame.outlierAverage', 'value': 200.6}, + {'metric': 'apply_frame.outlierRatio', 'value': 2.5}, + {'metric': 'apply_frame.noise', 'value': 0.4}, + {'metric': 'drawFrameDuration.average', 'value': 2058.9}, + { + 'metric': 'drawFrameDuration.outlierAverage', + 'value': 24000, + }, + { + 'metric': 'drawFrameDuration.outlierRatio', + 'value': 12.05, + }, + {'metric': 'drawFrameDuration.noise', 'value': 0.34}, + {'metric': 'totalUiFrame.average', 'value': 4166}, + ], +}; + +final Map>> testBenchmarkResults2 = + >>{ + 'foo': >[ + {'metric': 'preroll_frame.average', 'value': 65.5}, + {'metric': 'preroll_frame.outlierAverage', 'value': 1410}, + {'metric': 'preroll_frame.outlierRatio', 'value': 20.0}, + {'metric': 'preroll_frame.noise', 'value': 1.5}, + {'metric': 'apply_frame.average', 'value': 50.0}, + {'metric': 'apply_frame.outlierAverage', 'value': 100.0}, + {'metric': 'apply_frame.outlierRatio', 'value': 2.55}, + {'metric': 'apply_frame.noise', 'value': 0.9}, + {'metric': 'drawFrameDuration.average', 'value': 2000.0}, + { + 'metric': 'drawFrameDuration.outlierAverage', + 'value': 20000 + }, + { + 'metric': 'drawFrameDuration.outlierRatio', + 'value': 11.05 + }, + {'metric': 'drawFrameDuration.noise', 'value': 1.34}, + {'metric': 'totalUiFrame.average', 'value': 4150}, + ], + 'bar': >[ + {'metric': 'preroll_frame.average', 'value': 65.5}, + {'metric': 'preroll_frame.outlierAverage', 'value': 1410}, + {'metric': 'preroll_frame.outlierRatio', 'value': 20.0}, + {'metric': 'preroll_frame.noise', 'value': 1.5}, + {'metric': 'apply_frame.average', 'value': 50.0}, + {'metric': 'apply_frame.outlierAverage', 'value': 100.0}, + {'metric': 'apply_frame.outlierRatio', 'value': 2.55}, + {'metric': 'apply_frame.noise', 'value': 0.9}, + {'metric': 'drawFrameDuration.average', 'value': 2000.0}, + { + 'metric': 'drawFrameDuration.outlierAverage', + 'value': 20000 + }, + { + 'metric': 'drawFrameDuration.outlierRatio', + 'value': 11.05 + }, + {'metric': 'drawFrameDuration.noise', 'value': 1.34}, + {'metric': 'totalUiFrame.average', 'value': 4150}, + ], +}; + +final Map>> expectedBenchmarkDelta = + >>{ + 'foo': >[ + { + 'metric': 'preroll_frame.average', + 'value': 65.5, + 'delta': 5.0 + }, + { + 'metric': 'preroll_frame.outlierAverage', + 'value': 1410.0, + 'delta': 10.0, + }, + { + 'metric': 'preroll_frame.outlierRatio', + 'value': 20.0, + 'delta': -0.1999999999999993, + }, + { + 'metric': 'preroll_frame.noise', + 'value': 1.5, + 'delta': 0.65, + }, + { + 'metric': 'apply_frame.average', + 'value': 50.0, + 'delta': -30.0, + }, + { + 'metric': 'apply_frame.outlierAverage', + 'value': 100.0, + 'delta': -100.6, + }, + { + 'metric': 'apply_frame.outlierRatio', + 'value': 2.55, + 'delta': 0.04999999999999982, + }, + { + 'metric': 'apply_frame.noise', + 'value': 0.9, + 'delta': 0.5, + }, + { + 'metric': 'drawFrameDuration.average', + 'value': 2000.0, + 'delta': -58.90000000000009, + }, + { + 'metric': 'drawFrameDuration.outlierAverage', + 'value': 20000.0, + 'delta': -4000.0, + }, + { + 'metric': 'drawFrameDuration.outlierRatio', + 'value': 11.05, + 'delta': -1.0, + }, + { + 'metric': 'drawFrameDuration.noise', + 'value': 1.34, + 'delta': 1.0, + }, + { + 'metric': 'totalUiFrame.average', + 'value': 4150.0, + 'delta': -16.0, + }, + ], + 'bar': >[ + { + 'metric': 'preroll_frame.average', + 'value': 65.5, + 'delta': 5.0, + }, + { + 'metric': 'preroll_frame.outlierAverage', + 'value': 1410.0, + 'delta': 10.0, + }, + { + 'metric': 'preroll_frame.outlierRatio', + 'value': 20.0, + 'delta': -0.1999999999999993, + }, + { + 'metric': 'preroll_frame.noise', + 'value': 1.5, + 'delta': 0.65, + }, + { + 'metric': 'apply_frame.average', + 'value': 50.0, + 'delta': -30.0, + }, + { + 'metric': 'apply_frame.outlierAverage', + 'value': 100.0, + 'delta': -100.6, + }, + { + 'metric': 'apply_frame.outlierRatio', + 'value': 2.55, + 'delta': 0.04999999999999982, + }, + { + 'metric': 'apply_frame.noise', + 'value': 0.9, + 'delta': 0.5, + }, + { + 'metric': 'drawFrameDuration.average', + 'value': 2000.0, + 'delta': -58.90000000000009, + }, + { + 'metric': 'drawFrameDuration.outlierAverage', + 'value': 20000.0, + 'delta': -4000.0, + }, + { + 'metric': 'drawFrameDuration.outlierRatio', + 'value': 11.05, + 'delta': -1.0, + }, + { + 'metric': 'drawFrameDuration.noise', + 'value': 1.34, + 'delta': 1.0, + }, + { + 'metric': 'totalUiFrame.average', + 'value': 4150.0, + 'delta': -16.0, + }, + ], +}; diff --git a/packages/web_benchmarks/test/src/benchmark_result_test.dart b/packages/web_benchmarks/test/src/benchmark_result_test.dart index 18765d00646..65adee8556a 100644 --- a/packages/web_benchmarks/test/src/benchmark_result_test.dart +++ b/packages/web_benchmarks/test/src/benchmark_result_test.dart @@ -10,8 +10,8 @@ void main() { test('$BenchmarkResults', () { final Map data = { 'foo': >[ - {'metric': 'foo.bar', 'value': 12.34}, - {'metric': 'foo.baz', 'value': 10}, + {'metric': 'foo.bar', 'value': 12.34, 'delta': -0.2}, + {'metric': 'foo.baz', 'value': 10, 'delta': 3.3}, ], 'bar': >[ {'metric': 'bar.foo', 'value': 1.23}, @@ -27,11 +27,14 @@ void main() { expect(fooBenchmarks.length, 2); expect(fooBenchmarks[0].metric, 'foo.bar'); expect(fooBenchmarks[0].value, 12.34); + expect(fooBenchmarks[0].delta, -0.2); expect(fooBenchmarks[1].metric, 'foo.baz'); expect(fooBenchmarks[1].value, 10); + expect(fooBenchmarks[1].delta, 3.3); expect(barBenchmarks.length, 1); expect(barBenchmarks[0].metric, 'bar.foo'); expect(barBenchmarks[0].value, 1.23); + expect(barBenchmarks[0].delta, isNull); expect(benchmarkResults.toJson(), data); }); @@ -39,12 +42,14 @@ void main() { test('$BenchmarkScore', () { final Map data = { 'metric': 'foo', - 'value': 1.234 + 'value': 1.234, + 'delta': -0.4, }; final BenchmarkScore score = BenchmarkScore.parse(data); expect(score.metric, 'foo'); expect(score.value, 1.234); + expect(score.delta, -0.4); expect(score.toJson(), data); });