diff --git a/packages/devtools_app/benchmark/README.md b/packages/devtools_app/benchmark/README.md
index ab06b1776de..195c3e9a569 100644
--- a/packages/devtools_app/benchmark/README.md
+++ b/packages/devtools_app/benchmark/README.md
@@ -21,7 +21,6 @@ All of the commands below should be run from the `packages/devtools_app` directo
 To run the performance benchmark tests locally, run:
 ```sh
 dart run benchmark/scripts/run_benchmarks.dart
-dart run benchmark/run_benchmarks.dart
 ```
 
 To run the test that verifies we can run benchmark tests, run:
@@ -48,4 +47,18 @@ the other running tests are using.
 
 The tests are defined by "automators", which live in the `benchmark/test_infra/automators`
 directory. To add a new test or test case, either modify an existing automator or add
-a new one for a new screen. Follow existing examples in that directory for guidance.
\ No newline at end of file
+a new one for a new screen. Follow existing examples in that directory for guidance.
+
+## Comparing two benchmark test runs
+
+In order to compare two different benchmark runs, you first need to run the benchmark
+tests and save the results to a file:
+```sh
+dart run benchmark/scripts/run_benchmarks.dart --save-to-file=baseline.json
+dart run benchmark/scripts/run_benchmarks.dart --save-to-file=test.json
+```
+
+Then, to compare the benchmarks and calculate deltas, run:
+```sh
+dart run benchmark/scripts/compare_benchmarks.dart baseline_file.json test_file.json
+```
diff --git a/packages/devtools_app/benchmark/devtools_benchmarks_test.dart b/packages/devtools_app/benchmark/devtools_benchmarks_test.dart
index 35d687a7261..14cdcf5d6eb 100644
--- a/packages/devtools_app/benchmark/devtools_benchmarks_test.dart
+++ b/packages/devtools_app/benchmark/devtools_benchmarks_test.dart
@@ -11,6 +11,7 @@ import 'dart:io';
 import 'package:test/test.dart';
 import 'package:web_benchmarks/server.dart';
 
+import 'scripts/compare_benchmarks.dart';
 import 'test_infra/common.dart';
 import 'test_infra/project_root_directory.dart';
 
@@ -37,6 +38,21 @@ void main() {
     timeout: const Timeout(Duration(minutes: 10)),
   );
 
+  test(
+    'Can compare web benchmarks',
+    () {
+      final benchmark1 = BenchmarkResults.parse(testBenchmarkResults1);
+      final benchmark2 = BenchmarkResults.parse(testBenchmarkResults2);
+      final comparison = compareBenchmarks(
+        benchmark1,
+        benchmark2,
+        baselineSource: 'path/to/baseline',
+      );
+      expect(comparison, testBenchmarkComparison);
+    },
+    timeout: const Timeout(Duration(minutes: 10)),
+  );
+
   // TODO(kenz): add tests that verify performance meets some expected threshold
 }
 
@@ -86,3 +102,134 @@ Future<void> _runBenchmarks({bool useWasm = false}) async {
     isA<String>(),
   );
 }
+
+final testBenchmarkResults1 = {
+  'foo': [
+    {'metric': 'preroll_frame.average', 'value': 60.5},
+    {'metric': 'preroll_frame.outlierAverage', 'value': 1400},
+    {'metric': 'preroll_frame.outlierRatio', 'value': 20.2},
+    {'metric': 'preroll_frame.noise', 'value': 0.85},
+    {'metric': 'apply_frame.average', 'value': 80.0},
+    {'metric': 'apply_frame.outlierAverage', 'value': 200.6},
+    {'metric': 'apply_frame.outlierRatio', 'value': 2.5},
+    {'metric': 'apply_frame.noise', 'value': 0.4},
+    {'metric': 'drawFrameDuration.average', 'value': 2058.9},
+    {'metric': 'drawFrameDuration.outlierAverage', 'value': 24000},
+    {'metric': 'drawFrameDuration.outlierRatio', 'value': 12.05},
+    {'metric': 'drawFrameDuration.noise', 'value': 0.34},
+    {'metric': 'totalUiFrame.average', 'value': 4166},
+  ],
+  'bar': [
+    {'metric': 'preroll_frame.average', 'value': 60.5},
+    {'metric': 'preroll_frame.outlierAverage', 'value': 1400},
+    {'metric': 'preroll_frame.outlierRatio', 'value': 20.2},
+    {'metric': 'preroll_frame.noise', 'value': 0.85},
+    {'metric': 'apply_frame.average', 'value': 80.0},
+    {'metric': 'apply_frame.outlierAverage', 'value': 200.6},
+    {'metric': 'apply_frame.outlierRatio', 'value': 2.5},
+    {'metric': 'apply_frame.noise', 'value': 0.4},
+    {'metric': 'drawFrameDuration.average', 'value': 2058.9},
+    {'metric': 'drawFrameDuration.outlierAverage', 'value': 24000},
+    {'metric': 'drawFrameDuration.outlierRatio', 'value': 12.05},
+    {'metric': 'drawFrameDuration.noise', 'value': 0.34},
+    {'metric': 'totalUiFrame.average', 'value': 4166},
+  ],
+};
+
+final testBenchmarkResults2 = {
+  'foo': [
+    {'metric': 'preroll_frame.average', 'value': 65.5},
+    {'metric': 'preroll_frame.outlierAverage', 'value': 1410},
+    {'metric': 'preroll_frame.outlierRatio', 'value': 20.0},
+    {'metric': 'preroll_frame.noise', 'value': 1.5},
+    {'metric': 'apply_frame.average', 'value': 50.0},
+    {'metric': 'apply_frame.outlierAverage', 'value': 100.0},
+    {'metric': 'apply_frame.outlierRatio', 'value': 2.55},
+    {'metric': 'apply_frame.noise', 'value': 0.9},
+    {'metric': 'drawFrameDuration.average', 'value': 2000.0},
+    {'metric': 'drawFrameDuration.outlierAverage', 'value': 20000},
+    {'metric': 'drawFrameDuration.outlierRatio', 'value': 11.05},
+    {'metric': 'drawFrameDuration.noise', 'value': 1.34},
+    {'metric': 'totalUiFrame.average', 'value': 4150},
+  ],
+  'bar': [
+    {'metric': 'preroll_frame.average', 'value': 65.5},
+    {'metric': 'preroll_frame.outlierAverage', 'value': 1410},
+    {'metric': 'preroll_frame.outlierRatio', 'value': 20.0},
+    {'metric': 'preroll_frame.noise', 'value': 1.5},
+    {'metric': 'apply_frame.average', 'value': 50.0},
+    {'metric': 'apply_frame.outlierAverage', 'value': 100.0},
+    {'metric': 'apply_frame.outlierRatio', 'value': 2.55},
+    {'metric': 'apply_frame.noise', 'value': 0.9},
+    {'metric': 'drawFrameDuration.average', 'value': 2000.0},
+    {'metric': 'drawFrameDuration.outlierAverage', 'value': 20000},
+    {'metric': 'drawFrameDuration.outlierRatio', 'value': 11.05},
+    {'metric': 'drawFrameDuration.noise', 'value': 1.34},
+    {'metric': 'totalUiFrame.average', 'value': 4150},
+  ],
+};
+
+final testBenchmarkComparison = {
+  'foo': [
+    {'metric': 'preroll_frame.average', 'value': 65.5, 'delta': 5.0},
+    {'metric': 'preroll_frame.outlierAverage', 'value': 1410.0, 'delta': 10.0},
+    {
+      'metric': 'preroll_frame.outlierRatio',
+      'value': 20.0,
+      'delta': -0.1999999999999993,
+    },
+    {'metric': 'preroll_frame.noise', 'value': 1.5, 'delta': 0.65},
+    {'metric': 'apply_frame.average', 'value': 50.0, 'delta': -30.0},
+    {'metric': 'apply_frame.outlierAverage', 'value': 100.0, 'delta': -100.6},
+    {
+      'metric': 'apply_frame.outlierRatio',
+      'value': 2.55,
+      'delta': 0.04999999999999982,
+    },
+    {'metric': 'apply_frame.noise', 'value': 0.9, 'delta': 0.5},
+    {
+      'metric': 'drawFrameDuration.average',
+      'value': 2000.0,
+      'delta': -58.90000000000009,
+    },
+    {
+      'metric': 'drawFrameDuration.outlierAverage',
+      'value': 20000.0,
+      'delta': -4000.0,
+    },
+    {'metric': 'drawFrameDuration.outlierRatio', 'value': 11.05, 'delta': -1.0},
+    {'metric': 'drawFrameDuration.noise', 'value': 1.34, 'delta': 1.0},
+    {'metric': 'totalUiFrame.average', 'value': 4150.0, 'delta': -16.0},
+  ],
+  'bar': [
+    {'metric': 'preroll_frame.average', 'value': 65.5, 'delta': 5.0},
+    {'metric': 'preroll_frame.outlierAverage', 'value': 1410.0, 'delta': 10.0},
+    {
+      'metric': 'preroll_frame.outlierRatio',
+      'value': 20.0,
+      'delta': -0.1999999999999993,
+    },
+    {'metric': 'preroll_frame.noise', 'value': 1.5, 'delta': 0.65},
+    {'metric': 'apply_frame.average', 'value': 50.0, 'delta': -30.0},
+    {'metric': 'apply_frame.outlierAverage', 'value': 100.0, 'delta': -100.6},
+    {
+      'metric': 'apply_frame.outlierRatio',
+      'value': 2.55,
+      'delta': 0.04999999999999982,
+    },
+    {'metric': 'apply_frame.noise', 'value': 0.9, 'delta': 0.5},
+    {
+      'metric': 'drawFrameDuration.average',
+      'value': 2000.0,
+      'delta': -58.90000000000009,
+    },
+    {
+      'metric': 'drawFrameDuration.outlierAverage',
+      'value': 20000.0,
+      'delta': -4000.0,
+    },
+    {'metric': 'drawFrameDuration.outlierRatio', 'value': 11.05, 'delta': -1.0},
+    {'metric': 'drawFrameDuration.noise', 'value': 1.34, 'delta': 1.0},
+    {'metric': 'totalUiFrame.average', 'value': 4150.0, 'delta': -16.0},
+  ],
+};
diff --git a/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart b/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart
new file mode 100644
index 00000000000..234ef9a0d87
--- /dev/null
+++ b/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart
@@ -0,0 +1,125 @@
+// Copyright 2023 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+import 'dart:convert';
+import 'dart:io';
+
+import 'package:collection/collection.dart';
+import 'package:web_benchmarks/server.dart';
+
+import 'utils.dart';
+
+/// Compares two sets of web benchmarks and calculates the delta between each
+/// matching metric.
+void main(List<String> args) {
+  if (args.length != 2) {
+    throw Exception(
+      'Expected 2 arguments (<baseline-file>, <test-file>), but instead there '
+      'were ${args.length}.',
+    );
+  }
+
+  final baselineSource = args[0];
+  final testSource = args[1];
+
+  stdout
+    ..writeln('Comparing the following benchmark results:')
+    ..writeln('    "$testSource" (test)')
+    ..writeln('    "$baselineSource" (baseline)');
+
+  final baselineFile = checkFileExists(baselineSource);
+  final testFile = checkFileExists(testSource);
+  if (baselineFile == null || testFile == null) {
+    if (baselineFile == null) {
+      throw Exception('Cannot find baseline file $baselineSource');
+    }
+    if (testFile == null) {
+      throw Exception('Cannot find test file $testSource');
+    }
+  }
+
+  final baselineResults =
+      BenchmarkResults.parse(jsonDecode(baselineFile.readAsStringSync()));
+  final testResults =
+      BenchmarkResults.parse(jsonDecode(testFile.readAsStringSync()));
+  compareBenchmarks(
+    baselineResults,
+    testResults,
+    baselineSource: baselineSource,
+  );
+}
+
+Map<String, List<Map<String, Object?>>> compareBenchmarks(
+  BenchmarkResults baseline,
+  BenchmarkResults test, {
+  required String baselineSource,
+}) {
+  stdout.writeln('Starting baseline comparison...');
+
+  for (final benchmarkName in test.scores.keys) {
+    stdout.writeln('Comparing metrics for benchmark "$benchmarkName".');
+
+    // Lookup this benchmark in the baseline.
+    final baselineScores = baseline.scores[benchmarkName];
+    if (baselineScores == null) {
+      stdout.writeln(
+        'Baseline does not contain results for benchmark "$benchmarkName".',
+      );
+      continue;
+    }
+
+    final testScores = test.scores[benchmarkName]!;
+
+    for (final score in testScores) {
+      // Lookup this metric in the baseline.
+      final baselineScore =
+          baselineScores.firstWhereOrNull((s) => s.metric == score.metric);
+      if (baselineScore == null) {
+        stdout.writeln(
+          'Baseline does not contain metric "${score.metric}" for '
+          'benchmark "$benchmarkName".',
+        );
+        continue;
+      }
+
+      // Add the delta to the [testMetric].
+      _benchmarkDeltas[score] = (score.value - baselineScore.value).toDouble();
+    }
+  }
+  stdout.writeln('Baseline comparison finished.');
+
+  final comparisonAsMap = test.toJsonWithDeltas();
+  stdout
+    ..writeln('==== Comparison with baseline $baselineSource ====')
+    ..writeln(const JsonEncoder.withIndent('  ').convert(comparisonAsMap))
+    ..writeln('==== End of baseline comparison ====');
+  return comparisonAsMap;
+}
+
+Expando<double> _benchmarkDeltas = Expando<double>();
+
+extension ScoreDeltaExtension on BenchmarkScore {
+  double? get deltaFromBaseline => _benchmarkDeltas[this];
+}
+
+extension ResultDeltaExtension on BenchmarkResults {
+  Map<String, List<Map<String, Object?>>> toJsonWithDeltas() {
+    return scores.map<String, List<Map<String, Object?>>>(
+      (String benchmarkName, List<BenchmarkScore> scores) {
+        return MapEntry<String, List<Map<String, Object?>>>(
+          benchmarkName,
+          scores.map<Map<String, Object?>>(
+            (BenchmarkScore score) {
+              final delta = _benchmarkDeltas[score];
+              return <String, Object?>{
+                ...score.toJson(),
+                if (delta != null) 'delta': delta,
+              };
+            },
+          ).toList(),
+        );
+      },
+    );
+  }
+}
diff --git a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
index 5dfdab1b003..eb052867a9f 100644
--- a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
+++ b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
@@ -10,6 +10,8 @@ import 'package:web_benchmarks/server.dart';
 
 import '../test_infra/common.dart';
 import '../test_infra/project_root_directory.dart';
+import 'compare_benchmarks.dart';
+import 'utils.dart';
 
 /// Runs the DevTools web benchmarks and reports the benchmark data.
 ///
@@ -36,11 +38,36 @@ Future<void> main(List<String> args) async {
   final resultsAsJsonString =
       const JsonEncoder.withIndent('  ').convert(resultsAsMap);
 
+  if (benchmarkArgs.saveToFileLocation != null) {
+    final location = Uri.parse(benchmarkArgs.saveToFileLocation!);
+    File.fromUri(location)
+      ..createSync()
+      ..writeAsStringSync(resultsAsJsonString);
+  }
+
   stdout
     ..writeln('==== Results ====')
     ..writeln(resultsAsJsonString)
     ..writeln('==== End of results ====')
     ..writeln();
+
+  final baselineSource = benchmarkArgs.baselineLocation;
+  if (baselineSource != null) {
+    final baselineFile = checkFileExists(baselineSource);
+    if (baselineFile != null) {
+      final baselineResults = BenchmarkResults.parse(
+        jsonDecode(baselineFile.readAsStringSync()),
+      );
+      final testResults = BenchmarkResults.parse(
+        jsonDecode(resultsAsJsonString),
+      );
+      compareBenchmarks(
+        baselineResults,
+        testResults,
+        baselineSource: baselineSource,
+      );
+    }
+  }
 }
 
 class BenchmarkArgs {
@@ -57,10 +84,18 @@ class BenchmarkArgs {
 
   bool get useWasm => argResults[_wasmFlag];
 
+  String? get saveToFileLocation => argResults[_saveToFileOption];
+
+  String? get baselineLocation => argResults[_baselineOption];
+
   static const _browserFlag = 'browser';
 
   static const _wasmFlag = 'wasm';
 
+  static const _saveToFileOption = 'save-to-file';
+
+  static const _baselineOption = 'baseline';
+
   /// Builds an arg parser for DevTools benchmarks.
   static ArgParser _buildArgParser() {
     return ArgParser()
@@ -71,6 +106,18 @@ class BenchmarkArgs {
       ..addFlag(
         _wasmFlag,
         help: 'Runs the benchmark tests with dart2wasm',
+      )
+      ..addOption(
+        _saveToFileOption,
+        help: 'Saves the benchmark results to a JSON file at the given path.',
+        valueHelp: '/Users/me/Downloads/output.json',
+      )
+      ..addOption(
+        _baselineOption,
+        help: 'The baseline benchmark data to compare this test run to. The '
+            'baseline file should be created by running this script with the '
+            '$_saveToFileOption in a separate test run.',
+        valueHelp: '/Users/me/Downloads/baseline.json',
       );
   }
 }
diff --git a/packages/devtools_app/benchmark/scripts/utils.dart b/packages/devtools_app/benchmark/scripts/utils.dart
new file mode 100644
index 00000000000..a4cbc99bead
--- /dev/null
+++ b/packages/devtools_app/benchmark/scripts/utils.dart
@@ -0,0 +1,14 @@
+// Copyright 2023 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+import 'dart:io';
+
+File? checkFileExists(String path) {
+  final testFile = File.fromUri(Uri.parse(path));
+  if (!testFile.existsSync()) {
+    stdout.writeln('Could not locate file at $path.');
+    return null;
+  }
+  return testFile;
+}
diff --git a/packages/devtools_app/pubspec.yaml b/packages/devtools_app/pubspec.yaml
index 1b839ad86a1..0b7b31afbf6 100644
--- a/packages/devtools_app/pubspec.yaml
+++ b/packages/devtools_app/pubspec.yaml
@@ -79,7 +79,7 @@ dev_dependencies:
   mockito: ^5.4.1
   stager: ^1.0.1
   test: ^1.21.1
-  web_benchmarks: ^1.0.0
+  web_benchmarks: ^1.0.1
   webkit_inspection_protocol: ">=0.5.0 <2.0.0"
 
 flutter: