diff --git a/.github/actions/c-chain-reexecution-benchmark/action.yml b/.github/actions/c-chain-reexecution-benchmark/action.yml
index 6217ba3824d3..c5e5b1b80d96 100644
--- a/.github/actions/c-chain-reexecution-benchmark/action.yml
+++ b/.github/actions/c-chain-reexecution-benchmark/action.yml
@@ -147,7 +147,7 @@ runs:
       run: |
         TIMESTAMP=$(date '+%Y%m%d-%H%M%S')
         echo "EXECUTION_DATA_DIR=/tmp/reexecution-data-${TIMESTAMP}" >> "$GITHUB_ENV"
-        echo "BENCHMARK_OUTPUT_FILE=${GITHUB_WORKSPACE}/benchmark-output.txt" >> "$GITHUB_ENV"
+        echo "BENCHMARK_OUTPUT_FILE=${GITHUB_WORKSPACE}/benchmark-output.json" >> "$GITHUB_ENV"
     - name: Run C-Chain Re-execution Benchmark
       shell: nix develop --impure --command bash -x {0}
       run: |
@@ -183,7 +183,7 @@ runs:
     - name: Compare Benchmark Results
       uses: benchmark-action/github-action-benchmark@v1
       with:
-        tool: 'go'
+        tool: 'customBiggerIsBetter'
         output-file-path: ${{ env.BENCHMARK_OUTPUT_FILE }}
         summary-always: true
         github-token: ${{ inputs.github-token }}
diff --git a/scripts/benchmark_cchain_range.sh b/scripts/benchmark_cchain_range.sh
index 896e68f8a559..aa8777e4d473 100755
--- a/scripts/benchmark_cchain_range.sh
+++ b/scripts/benchmark_cchain_range.sh
@@ -19,20 +19,15 @@ set -euo pipefail
 : "${START_BLOCK:?START_BLOCK must be set}"
 : "${END_BLOCK:?END_BLOCK must be set}"
 
-cmd="go test -timeout=0 -v -benchtime=1x -bench=BenchmarkReexecuteRange -run=^$ github.com/ava-labs/avalanchego/tests/reexecute/c \
-  --block-dir=\"${BLOCK_DIR}\" \
-  --current-state-dir=\"${CURRENT_STATE_DIR}\" \
-  ${RUNNER_NAME:+--runner=\"${RUNNER_NAME}\"} \
-  ${CONFIG:+--config=\"${CONFIG}\"} \
-  --start-block=\"${START_BLOCK}\" \
-  --end-block=\"${END_BLOCK}\" \
-  ${LABELS:+--labels=\"${LABELS}\"} \
-  ${METRICS_SERVER_ENABLED:+--metrics-server-enabled=\"${METRICS_SERVER_ENABLED}\"} \
-  ${METRICS_SERVER_PORT:+--metrics-server-port=\"${METRICS_SERVER_PORT}\"} \
-  ${METRICS_COLLECTOR_ENABLED:+--metrics-collector-enabled=\"${METRICS_COLLECTOR_ENABLED}\"}"
-
-if [ -n "${BENCHMARK_OUTPUT_FILE:-}" ]; then
-  eval "$cmd" | tee "${BENCHMARK_OUTPUT_FILE}"
-else
-  eval "$cmd"
-fi
+go run github.com/ava-labs/avalanchego/tests/reexecute/c \
+  --block-dir="${BLOCK_DIR}" \
+  --current-state-dir="${CURRENT_STATE_DIR}" \
+  ${RUNNER_NAME:+--runner="${RUNNER_NAME}"} \
+  ${CONFIG:+--config="${CONFIG}"} \
+  --start-block="${START_BLOCK}" \
+  --end-block="${END_BLOCK}" \
+  ${LABELS:+--labels="${LABELS}"} \
+  ${BENCHMARK_OUTPUT_FILE:+--benchmark-output-file="${BENCHMARK_OUTPUT_FILE}"} \
+  ${METRICS_SERVER_ENABLED:+--metrics-server-enabled="${METRICS_SERVER_ENABLED}"} \
+  ${METRICS_SERVER_PORT:+--metrics-server-port="${METRICS_SERVER_PORT}"} \
+  ${METRICS_COLLECTOR_ENABLED:+--metrics-collector-enabled="${METRICS_COLLECTOR_ENABLED}"}
diff --git a/tests/reexecute/blockexport/main.go b/tests/reexecute/blockexport/main.go
index 7c070fd5f185..7f7c7275bd65 100644
--- a/tests/reexecute/blockexport/main.go
+++ b/tests/reexecute/blockexport/main.go
@@ -45,7 +45,6 @@ func main() {
 		startBlockArg,
 		endBlockArg,
 		chanSize,
-		tc.DeferCleanup,
 	)
 	r.NoError(err)
 
diff --git a/tests/reexecute/c/vm_reexecute_test.go b/tests/reexecute/c/vm_reexecute.go
similarity index 81%
rename from tests/reexecute/c/vm_reexecute_test.go
rename to tests/reexecute/c/vm_reexecute.go
index b5247d3239d7..586b4c6a2116 100644
--- a/tests/reexecute/c/vm_reexecute_test.go
+++ b/tests/reexecute/c/vm_reexecute.go
@@ -1,10 +1,11 @@
 // Copyright (C) 2019-2025, Ava Labs, Inc. All rights reserved.
 // See the file LICENSE for licensing terms.
 
-package vm
+package main
 
 import (
 	"context"
+	"encoding/json"
 	"flag"
 	"fmt"
 	"maps"
@@ -13,7 +14,6 @@ import (
 	"slices"
 	"strconv"
 	"strings"
-	"testing"
 	"time"
 
 	"github.com/google/uuid"
@@ -41,6 +41,7 @@ import (
 	"github.com/ava-labs/avalanchego/utils/constants"
 	"github.com/ava-labs/avalanchego/utils/crypto/bls/signer/localsigner"
 	"github.com/ava-labs/avalanchego/utils/logging"
+	"github.com/ava-labs/avalanchego/utils/perms"
 	"github.com/ava-labs/avalanchego/utils/timer"
 	"github.com/ava-labs/avalanchego/vms/metervm"
 	"github.com/ava-labs/avalanchego/vms/platformvm/warp"
@@ -97,9 +98,11 @@ var (
 	configNameArg  string
 	runnerNameArg  string
 	configBytesArg []byte
+
+	benchmarkOutputFileArg string
 )
 
-func TestMain(m *testing.M) {
+func init() {
 	evm.RegisterAllLibEVMExtras()
 
 	flag.StringVar(&blockDirArg, "block-dir", blockDirArg, "Block DB directory to read from during re-execution.")
@@ -119,6 +122,8 @@ func TestMain(m *testing.M) {
 	flag.StringVar(&configNameArg, configKey, defaultConfigKey, fmt.Sprintf("Specifies the predefined config to use for the VM. Options include %s.", predefinedConfigOptionsStr))
 	flag.StringVar(&runnerNameArg, "runner", "dev", "Name of the runner executing this test. Added as a metric label and to the sub-benchmark's name to differentiate results on the runner key.")
 
+	flag.StringVar(&benchmarkOutputFileArg, "benchmark-output-file", benchmarkOutputFileArg, "Filepath where benchmark results will be written to.")
+
 	flag.Parse()
 
 	if metricsCollectorEnabledArg {
@@ -143,30 +148,40 @@ func TestMain(m *testing.M) {
 
 	// Set the runner name label on the metrics.
 	labels["runner"] = runnerNameArg
-
-	m.Run()
 }
 
-func BenchmarkReexecuteRange(b *testing.B) {
-	require.Equalf(b, 1, b.N, "BenchmarkReexecuteRange expects to run a single iteration because it overwrites the input current-state, but found (b.N=%d)", b.N)
-	b.Run(fmt.Sprintf("[%d,%d]-Config-%s-Runner-%s", startBlockArg, endBlockArg, configNameArg, runnerNameArg), func(b *testing.B) {
-		benchmarkReexecuteRange(
-			b,
-			blockDirArg,
-			currentStateDirArg,
-			configBytesArg,
-			startBlockArg,
-			endBlockArg,
-			chanSizeArg,
-			metricsServerEnabledArg,
-			metricsServerPortArg,
-			metricsCollectorEnabledArg,
-		)
-	})
+func main() {
+	tc := tests.NewTestContext(tests.NewDefaultLogger("c-chain-reexecution"))
+	tc.SetDefaultContextParent(context.Background())
+	defer tc.RecoverAndExit()
+
+	benchmarkName := fmt.Sprintf(
+		"BenchmarkReexecuteRange/[%d,%d]-Config-%s-Runner-%s",
+		startBlockArg,
+		endBlockArg,
+		configNameArg,
+		runnerNameArg,
+	)
+
+	benchmarkReexecuteRange(
+		tc,
+		benchmarkName,
+		blockDirArg,
+		currentStateDirArg,
+		configBytesArg,
+		startBlockArg,
+		endBlockArg,
+		chanSizeArg,
+		metricsServerEnabledArg,
+		metricsServerPortArg,
+		metricsCollectorEnabledArg,
+		benchmarkOutputFileArg,
+	)
 }
 
 func benchmarkReexecuteRange(
-	b *testing.B,
+	tc tests.TestContext,
+	benchmarkName string,
 	blockDir string,
 	currentStateDir string,
 	configBytes []byte,
@@ -176,9 +191,10 @@ func benchmarkReexecuteRange(
 	metricsServerEnabled bool,
 	metricsPort uint64,
 	metricsCollectorEnabled bool,
+	benchmarkOutputFile string,
 ) {
-	r := require.New(b)
-	ctx := b.Context()
+	r := require.New(tc)
+	ctx := tc.GetDefaultContextParent()
 
 	// Create the prefix gatherer passed to the VM and register it with the top-level,
 	// labeled gatherer.
@@ -195,13 +211,11 @@ func benchmarkReexecuteRange(
 	consensusRegistry := prometheus.NewRegistry()
 	r.NoError(prefixGatherer.Register("avalanche_snowman", consensusRegistry))
 
-	log := tests.NewDefaultLogger("c-chain-reexecution")
-
 	if metricsServerEnabled {
-		serverAddr := startServer(b, log, prefixGatherer, metricsPort)
+		serverAddr := startServer(tc, prefixGatherer, metricsPort)
 
 		if metricsCollectorEnabled {
-			startCollector(b, log, "c-chain-reexecution", labels, serverAddr)
+			startCollector(tc, "c-chain-reexecution", labels, serverAddr)
 		}
 	}
 
@@ -210,6 +224,7 @@ func benchmarkReexecuteRange(
 		chainDataDir = filepath.Join(currentStateDir, "chain-data-dir")
 	)
 
+	log := tc.Log()
 	log.Info("re-executing block range with params",
 		zap.String("runner", runnerNameArg),
 		zap.String("config", configNameArg),
@@ -225,7 +240,7 @@ func benchmarkReexecuteRange(
 		zap.Int("chan-size", chanSize),
 	)
 
-	blockChan, err := reexecute.CreateBlockChanFromLevelDB(b, blockDir, startBlock, endBlock, chanSize, b.Cleanup)
+	blockChan, err := reexecute.CreateBlockChanFromLevelDB(tc, blockDir, startBlock, endBlock, chanSize)
 	r.NoError(err)
 
 	dbLogger := tests.NewDefaultLogger("db")
@@ -265,8 +280,13 @@ func benchmarkReexecuteRange(
 	r.NoError(executor.executeSequence(ctx, blockChan))
 	elapsed := time.Since(start)
 
-	b.ReportMetric(0, "ns/op")                     // Set default ns/op to 0 to hide from the output
-	getTopLevelMetrics(b, prefixGatherer, elapsed) // Report the desired top-level metrics
+	benchmarkTool := newBenchmarkTool(benchmarkName)
+	getTopLevelMetrics(tc, benchmarkTool, prefixGatherer, elapsed) // Report the desired top-level metrics
+
+	benchmarkTool.logResults(log)
+	if len(benchmarkOutputFile) != 0 {
+		r.NoError(benchmarkTool.saveToFile(benchmarkOutputFile))
+	}
 }
 
 func newMainnetCChainVM(
@@ -496,21 +516,20 @@ func newConsensusMetrics(registry prometheus.Registerer) (*consensusMetrics, err
 // startServer starts a Prometheus server for the provided gatherer and returns
 // the server address.
 func startServer(
-	tb testing.TB,
-	log logging.Logger,
+	tc tests.TestContext,
 	gatherer prometheus.Gatherer,
 	port uint64,
 ) string {
-	r := require.New(tb)
+	r := require.New(tc)
 
 	server, err := tests.NewPrometheusServerWithPort(gatherer, port)
 	r.NoError(err)
 
-	log.Info("metrics endpoint available",
+	tc.Log().Info("metrics endpoint available",
 		zap.String("url", fmt.Sprintf("http://%s/ext/metrics", server.Address())),
 	)
 
-	tb.Cleanup(func() {
+	tc.DeferCleanup(func() {
 		r.NoError(server.Stop())
 	})
 
@@ -520,17 +539,14 @@ func startServer(
 // startCollector starts a Prometheus collector configured to scrape the server
 // listening on serverAddr. startCollector also attaches the provided labels +
 // Github labels if available to the collected metrics.
-func startCollector(tb testing.TB, log logging.Logger, name string, labels map[string]string, serverAddr string) {
-	r := require.New(tb)
-
-	startPromCtx, cancel := context.WithTimeout(tb.Context(), tests.DefaultTimeout)
-	defer cancel()
+func startCollector(tc tests.TestContext, name string, labels map[string]string, serverAddr string) {
+	r := require.New(tc)
 
 	logger := tests.NewDefaultLogger("prometheus")
-	r.NoError(tmpnet.StartPrometheus(startPromCtx, logger))
+	r.NoError(tmpnet.StartPrometheus(tc.DefaultContext(), logger))
 
 	var sdConfigFilePath string
-	tb.Cleanup(func() {
+	tc.DeferCleanup(func() {
 		// Ensure a final metrics scrape.
 		// This default delay is set above the default scrape interval used by StartPrometheus.
 		time.Sleep(tmpnet.NetworkShutdownDelay)
@@ -543,10 +559,7 @@ func startCollector(tb testing.TB, log logging.Logger, name string, labels map[s
 		}(),
 		)
 
-		//nolint:usetesting // t.Context() is already canceled inside the cleanup function
-		checkMetricsCtx, cancel := context.WithTimeout(context.Background(), tests.DefaultTimeout)
-		defer cancel()
-		r.NoError(tmpnet.CheckMetricsExist(checkMetricsCtx, logger, networkUUID))
+		r.NoError(tmpnet.CheckMetricsExist(tc.DefaultContext(), logger, networkUUID))
 	})
 
 	sdConfigFilePath, err := tmpnet.WritePrometheusSDConfig(name, tmpnet.SDConfig{
@@ -561,7 +574,7 @@ func startCollector(tb testing.TB, log logging.Logger, name string, labels map[s
 		startTime     = strconv.FormatInt(time.Now().UnixMilli(), 10)
 	)
 
-	log.Info("metrics available via grafana",
+	tc.Log().Info("metrics available via grafana",
 		zap.String(
 			"url",
 			tmpnet.NewGrafanaURI(networkUUID, startTime, "", grafanaURI),
@@ -569,6 +582,62 @@ func startCollector(tb testing.TB, log logging.Logger, name string, labels map[s
 	)
 }
 
+// benchmarkResult represents a single benchmark measurement.
+type benchmarkResult struct {
+	Name  string `json:"name"`
+	Value string `json:"value"`
+	Unit  string `json:"unit"`
+}
+
+// benchmarkTool collects and manages benchmark results for a named benchmark.
+// It allows adding multiple results and saving them to a file in JSON format.
+type benchmarkTool struct {
+	name    string
+	results []benchmarkResult
+}
+
+// newBenchmarkTool creates a new benchmarkTool instance with the given name.
+// The name is used as the base name for all results collected by this tool.
+// When results are added, the unit is appended to this base name.
+func newBenchmarkTool(name string) *benchmarkTool {
+	return &benchmarkTool{
+		name:    name,
+		results: make([]benchmarkResult, 0),
+	}
+}
+
+// addResult adds a new benchmark result with the given value and unit.
+// The result name is constructed by appending the unit to the benchmark name.
+// Calling `addResult` is analogous to calling `b.ReportMetric()`.
+func (b *benchmarkTool) addResult(value float64, unit string) {
+	result := benchmarkResult{
+		Name:  fmt.Sprintf("%s - %s", b.name, unit),
+		Value: strconv.FormatFloat(value, 'f', -1, 64),
+		Unit:  unit,
+	}
+	b.results = append(b.results, result)
+}
+
+// saveToFile writes all collected benchmark results to a JSON file at the
+// specified path. The output is formatted with indentation for readability.
+// Returns an error if marshaling or file writing fails.
+func (b *benchmarkTool) saveToFile(path string) error {
+	output, err := json.MarshalIndent(b.results, "", "  ")
+	if err != nil {
+		return err
+	}
+
+	return os.WriteFile(path, output, perms.ReadWrite)
+}
+
+// logResults logs all collected benchmark results using the provided logger.
+func (b *benchmarkTool) logResults(log logging.Logger) {
+	for _, r := range b.results {
+		result := fmt.Sprintf("%s %s", r.Value, r.Unit)
+		log.Info(b.name, zap.String("result", result))
+	}
+}
+
 // parseCustomLabels parses a comma-separated list of key-value pairs into a map
 // of custom labels.
 func parseCustomLabels(labelsStr string) (map[string]string, error) {
@@ -586,13 +655,14 @@ func parseCustomLabels(labelsStr string) (map[string]string, error) {
 	return labels, nil
 }
 
-func getTopLevelMetrics(b *testing.B, registry prometheus.Gatherer, elapsed time.Duration) {
-	r := require.New(b)
+func getTopLevelMetrics(tc tests.TestContext, tool *benchmarkTool, registry prometheus.Gatherer, elapsed time.Duration) {
+	r := require.New(tc)
 
 	gasUsed, err := getCounterMetricValue(registry, "avalanche_evm_eth_chain_block_gas_used_processed")
 	r.NoError(err)
 	mgasPerSecond := gasUsed / 1_000_000 / elapsed.Seconds()
-	b.ReportMetric(mgasPerSecond, "mgas/s")
+
+	tool.addResult(mgasPerSecond, "mgas/s")
 }
 
 func getCounterMetricValue(registry prometheus.Gatherer, query string) (float64, error) {
diff --git a/tests/reexecute/db.go b/tests/reexecute/db.go
index 89ce315c2f2c..609eb9a9ce3c 100644
--- a/tests/reexecute/db.go
+++ b/tests/reexecute/db.go
@@ -12,6 +12,7 @@ import (
 
 	"github.com/ava-labs/avalanchego/database"
 	"github.com/ava-labs/avalanchego/database/leveldb"
+	"github.com/ava-labs/avalanchego/tests"
 	"github.com/ava-labs/avalanchego/utils/logging"
 )
 
@@ -27,19 +28,16 @@ type BlockResult struct {
 // It opens the database at sourceDir and iterates through blocks from startBlock to endBlock (inclusive).
 // Blocks are read sequentially and sent to the returned channel as BlockResult values.
 //
-// cleanup is a function that registers cleanup callbacks and is used to ensure
-// that the database being read from is properly closed prior to the test terminating.
-//
 // Any validation errors or iteration errors are sent as BlockResult with Err set, then the channel is closed.
-func CreateBlockChanFromLevelDB(t require.TestingT, sourceDir string, startBlock, endBlock uint64, chanSize int, cleanup func(func())) (<-chan BlockResult, error) {
-	r := require.New(t)
+func CreateBlockChanFromLevelDB(tc tests.TestContext, sourceDir string, startBlock, endBlock uint64, chanSize int) (<-chan BlockResult, error) {
+	r := require.New(tc)
 	ch := make(chan BlockResult, chanSize)
 
 	db, err := leveldb.New(sourceDir, nil, logging.NoLog{}, prometheus.NewRegistry())
 	if err != nil {
 		return nil, fmt.Errorf("failed to create leveldb database from %q: %w", sourceDir, err)
 	}
-	cleanup(func() {
+	tc.DeferCleanup(func() {
 		r.NoError(db.Close())
 	})