diff --git a/.github/actions/c-chain-reexecution-benchmark/action.yml b/.github/actions/c-chain-reexecution-benchmark/action.yml index 6217ba3824d3..c5e5b1b80d96 100644 --- a/.github/actions/c-chain-reexecution-benchmark/action.yml +++ b/.github/actions/c-chain-reexecution-benchmark/action.yml @@ -147,7 +147,7 @@ runs: run: | TIMESTAMP=$(date '+%Y%m%d-%H%M%S') echo "EXECUTION_DATA_DIR=/tmp/reexecution-data-${TIMESTAMP}" >> "$GITHUB_ENV" - echo "BENCHMARK_OUTPUT_FILE=${GITHUB_WORKSPACE}/benchmark-output.txt" >> "$GITHUB_ENV" + echo "BENCHMARK_OUTPUT_FILE=${GITHUB_WORKSPACE}/benchmark-output.json" >> "$GITHUB_ENV" - name: Run C-Chain Re-execution Benchmark shell: nix develop --impure --command bash -x {0} run: | @@ -183,7 +183,7 @@ runs: - name: Compare Benchmark Results uses: benchmark-action/github-action-benchmark@v1 with: - tool: 'go' + tool: 'customBiggerIsBetter' output-file-path: ${{ env.BENCHMARK_OUTPUT_FILE }} summary-always: true github-token: ${{ inputs.github-token }} diff --git a/scripts/benchmark_cchain_range.sh b/scripts/benchmark_cchain_range.sh index 896e68f8a559..aa8777e4d473 100755 --- a/scripts/benchmark_cchain_range.sh +++ b/scripts/benchmark_cchain_range.sh @@ -19,20 +19,15 @@ set -euo pipefail : "${START_BLOCK:?START_BLOCK must be set}" : "${END_BLOCK:?END_BLOCK must be set}" -cmd="go test -timeout=0 -v -benchtime=1x -bench=BenchmarkReexecuteRange -run=^$ github.com/ava-labs/avalanchego/tests/reexecute/c \ - --block-dir=\"${BLOCK_DIR}\" \ - --current-state-dir=\"${CURRENT_STATE_DIR}\" \ - ${RUNNER_NAME:+--runner=\"${RUNNER_NAME}\"} \ - ${CONFIG:+--config=\"${CONFIG}\"} \ - --start-block=\"${START_BLOCK}\" \ - --end-block=\"${END_BLOCK}\" \ - ${LABELS:+--labels=\"${LABELS}\"} \ - ${METRICS_SERVER_ENABLED:+--metrics-server-enabled=\"${METRICS_SERVER_ENABLED}\"} \ - ${METRICS_SERVER_PORT:+--metrics-server-port=\"${METRICS_SERVER_PORT}\"} \ - ${METRICS_COLLECTOR_ENABLED:+--metrics-collector-enabled=\"${METRICS_COLLECTOR_ENABLED}\"}" - -if [ -n "${BENCHMARK_OUTPUT_FILE:-}" ]; then - eval "$cmd" | tee "${BENCHMARK_OUTPUT_FILE}" -else - eval "$cmd" -fi +go run github.com/ava-labs/avalanchego/tests/reexecute/c \ + --block-dir="${BLOCK_DIR}" \ + --current-state-dir="${CURRENT_STATE_DIR}" \ + ${RUNNER_NAME:+--runner="${RUNNER_NAME}"} \ + ${CONFIG:+--config="${CONFIG}"} \ + --start-block="${START_BLOCK}" \ + --end-block="${END_BLOCK}" \ + ${LABELS:+--labels="${LABELS}"} \ + ${BENCHMARK_OUTPUT_FILE:+--benchmark-output-file="${BENCHMARK_OUTPUT_FILE}"} \ + ${METRICS_SERVER_ENABLED:+--metrics-server-enabled="${METRICS_SERVER_ENABLED}"} \ + ${METRICS_SERVER_PORT:+--metrics-server-port="${METRICS_SERVER_PORT}"} \ + ${METRICS_COLLECTOR_ENABLED:+--metrics-collector-enabled="${METRICS_COLLECTOR_ENABLED}"} diff --git a/tests/reexecute/blockexport/main.go b/tests/reexecute/blockexport/main.go index 7c070fd5f185..7f7c7275bd65 100644 --- a/tests/reexecute/blockexport/main.go +++ b/tests/reexecute/blockexport/main.go @@ -45,7 +45,6 @@ func main() { startBlockArg, endBlockArg, chanSize, - tc.DeferCleanup, ) r.NoError(err) diff --git a/tests/reexecute/c/vm_reexecute_test.go b/tests/reexecute/c/vm_reexecute.go similarity index 81% rename from tests/reexecute/c/vm_reexecute_test.go rename to tests/reexecute/c/vm_reexecute.go index b5247d3239d7..586b4c6a2116 100644 --- a/tests/reexecute/c/vm_reexecute_test.go +++ b/tests/reexecute/c/vm_reexecute.go @@ -1,10 +1,11 @@ // Copyright (C) 2019-2025, Ava Labs, Inc. All rights reserved. // See the file LICENSE for licensing terms. -package vm +package main import ( "context" + "encoding/json" "flag" "fmt" "maps" @@ -13,7 +14,6 @@ import ( "slices" "strconv" "strings" - "testing" "time" "github.com/google/uuid" @@ -41,6 +41,7 @@ import ( "github.com/ava-labs/avalanchego/utils/constants" "github.com/ava-labs/avalanchego/utils/crypto/bls/signer/localsigner" "github.com/ava-labs/avalanchego/utils/logging" + "github.com/ava-labs/avalanchego/utils/perms" "github.com/ava-labs/avalanchego/utils/timer" "github.com/ava-labs/avalanchego/vms/metervm" "github.com/ava-labs/avalanchego/vms/platformvm/warp" @@ -97,9 +98,11 @@ var ( configNameArg string runnerNameArg string configBytesArg []byte + + benchmarkOutputFileArg string ) -func TestMain(m *testing.M) { +func init() { evm.RegisterAllLibEVMExtras() flag.StringVar(&blockDirArg, "block-dir", blockDirArg, "Block DB directory to read from during re-execution.") @@ -119,6 +122,8 @@ func TestMain(m *testing.M) { flag.StringVar(&configNameArg, configKey, defaultConfigKey, fmt.Sprintf("Specifies the predefined config to use for the VM. Options include %s.", predefinedConfigOptionsStr)) flag.StringVar(&runnerNameArg, "runner", "dev", "Name of the runner executing this test. Added as a metric label and to the sub-benchmark's name to differentiate results on the runner key.") + flag.StringVar(&benchmarkOutputFileArg, "benchmark-output-file", benchmarkOutputFileArg, "Filepath where benchmark results will be written to.") + flag.Parse() if metricsCollectorEnabledArg { @@ -143,30 +148,40 @@ func TestMain(m *testing.M) { // Set the runner name label on the metrics. labels["runner"] = runnerNameArg - - m.Run() } -func BenchmarkReexecuteRange(b *testing.B) { - require.Equalf(b, 1, b.N, "BenchmarkReexecuteRange expects to run a single iteration because it overwrites the input current-state, but found (b.N=%d)", b.N) - b.Run(fmt.Sprintf("[%d,%d]-Config-%s-Runner-%s", startBlockArg, endBlockArg, configNameArg, runnerNameArg), func(b *testing.B) { - benchmarkReexecuteRange( - b, - blockDirArg, - currentStateDirArg, - configBytesArg, - startBlockArg, - endBlockArg, - chanSizeArg, - metricsServerEnabledArg, - metricsServerPortArg, - metricsCollectorEnabledArg, - ) - }) +func main() { + tc := tests.NewTestContext(tests.NewDefaultLogger("c-chain-reexecution")) + tc.SetDefaultContextParent(context.Background()) + defer tc.RecoverAndExit() + + benchmarkName := fmt.Sprintf( + "BenchmarkReexecuteRange/[%d,%d]-Config-%s-Runner-%s", + startBlockArg, + endBlockArg, + configNameArg, + runnerNameArg, + ) + + benchmarkReexecuteRange( + tc, + benchmarkName, + blockDirArg, + currentStateDirArg, + configBytesArg, + startBlockArg, + endBlockArg, + chanSizeArg, + metricsServerEnabledArg, + metricsServerPortArg, + metricsCollectorEnabledArg, + benchmarkOutputFileArg, + ) } func benchmarkReexecuteRange( - b *testing.B, + tc tests.TestContext, + benchmarkName string, blockDir string, currentStateDir string, configBytes []byte, @@ -176,9 +191,10 @@ func benchmarkReexecuteRange( metricsServerEnabled bool, metricsPort uint64, metricsCollectorEnabled bool, + benchmarkOutputFile string, ) { - r := require.New(b) - ctx := b.Context() + r := require.New(tc) + ctx := tc.GetDefaultContextParent() // Create the prefix gatherer passed to the VM and register it with the top-level, // labeled gatherer. @@ -195,13 +211,11 @@ func benchmarkReexecuteRange( consensusRegistry := prometheus.NewRegistry() r.NoError(prefixGatherer.Register("avalanche_snowman", consensusRegistry)) - log := tests.NewDefaultLogger("c-chain-reexecution") - if metricsServerEnabled { - serverAddr := startServer(b, log, prefixGatherer, metricsPort) + serverAddr := startServer(tc, prefixGatherer, metricsPort) if metricsCollectorEnabled { - startCollector(b, log, "c-chain-reexecution", labels, serverAddr) + startCollector(tc, "c-chain-reexecution", labels, serverAddr) } } @@ -210,6 +224,7 @@ func benchmarkReexecuteRange( chainDataDir = filepath.Join(currentStateDir, "chain-data-dir") ) + log := tc.Log() log.Info("re-executing block range with params", zap.String("runner", runnerNameArg), zap.String("config", configNameArg), @@ -225,7 +240,7 @@ func benchmarkReexecuteRange( zap.Int("chan-size", chanSize), ) - blockChan, err := reexecute.CreateBlockChanFromLevelDB(b, blockDir, startBlock, endBlock, chanSize, b.Cleanup) + blockChan, err := reexecute.CreateBlockChanFromLevelDB(tc, blockDir, startBlock, endBlock, chanSize) r.NoError(err) dbLogger := tests.NewDefaultLogger("db") @@ -265,8 +280,13 @@ func benchmarkReexecuteRange( r.NoError(executor.executeSequence(ctx, blockChan)) elapsed := time.Since(start) - b.ReportMetric(0, "ns/op") // Set default ns/op to 0 to hide from the output - getTopLevelMetrics(b, prefixGatherer, elapsed) // Report the desired top-level metrics + benchmarkTool := newBenchmarkTool(benchmarkName) + getTopLevelMetrics(tc, benchmarkTool, prefixGatherer, elapsed) // Report the desired top-level metrics + + benchmarkTool.logResults(log) + if len(benchmarkOutputFile) != 0 { + r.NoError(benchmarkTool.saveToFile(benchmarkOutputFile)) + } } func newMainnetCChainVM( @@ -496,21 +516,20 @@ func newConsensusMetrics(registry prometheus.Registerer) (*consensusMetrics, err // startServer starts a Prometheus server for the provided gatherer and returns // the server address. func startServer( - tb testing.TB, - log logging.Logger, + tc tests.TestContext, gatherer prometheus.Gatherer, port uint64, ) string { - r := require.New(tb) + r := require.New(tc) server, err := tests.NewPrometheusServerWithPort(gatherer, port) r.NoError(err) - log.Info("metrics endpoint available", + tc.Log().Info("metrics endpoint available", zap.String("url", fmt.Sprintf("http://%s/ext/metrics", server.Address())), ) - tb.Cleanup(func() { + tc.DeferCleanup(func() { r.NoError(server.Stop()) }) @@ -520,17 +539,14 @@ func startServer( // startCollector starts a Prometheus collector configured to scrape the server // listening on serverAddr. startCollector also attaches the provided labels + // Github labels if available to the collected metrics. -func startCollector(tb testing.TB, log logging.Logger, name string, labels map[string]string, serverAddr string) { - r := require.New(tb) - - startPromCtx, cancel := context.WithTimeout(tb.Context(), tests.DefaultTimeout) - defer cancel() +func startCollector(tc tests.TestContext, name string, labels map[string]string, serverAddr string) { + r := require.New(tc) logger := tests.NewDefaultLogger("prometheus") - r.NoError(tmpnet.StartPrometheus(startPromCtx, logger)) + r.NoError(tmpnet.StartPrometheus(tc.DefaultContext(), logger)) var sdConfigFilePath string - tb.Cleanup(func() { + tc.DeferCleanup(func() { // Ensure a final metrics scrape. // This default delay is set above the default scrape interval used by StartPrometheus. time.Sleep(tmpnet.NetworkShutdownDelay) @@ -543,10 +559,7 @@ func startCollector(tb testing.TB, log logging.Logger, name string, labels map[s }(), ) - //nolint:usetesting // t.Context() is already canceled inside the cleanup function - checkMetricsCtx, cancel := context.WithTimeout(context.Background(), tests.DefaultTimeout) - defer cancel() - r.NoError(tmpnet.CheckMetricsExist(checkMetricsCtx, logger, networkUUID)) + r.NoError(tmpnet.CheckMetricsExist(tc.DefaultContext(), logger, networkUUID)) }) sdConfigFilePath, err := tmpnet.WritePrometheusSDConfig(name, tmpnet.SDConfig{ @@ -561,7 +574,7 @@ func startCollector(tb testing.TB, log logging.Logger, name string, labels map[s startTime = strconv.FormatInt(time.Now().UnixMilli(), 10) ) - log.Info("metrics available via grafana", + tc.Log().Info("metrics available via grafana", zap.String( "url", tmpnet.NewGrafanaURI(networkUUID, startTime, "", grafanaURI), @@ -569,6 +582,62 @@ func startCollector(tb testing.TB, log logging.Logger, name string, labels map[s ) } +// benchmarkResult represents a single benchmark measurement. +type benchmarkResult struct { + Name string `json:"name"` + Value string `json:"value"` + Unit string `json:"unit"` +} + +// benchmarkTool collects and manages benchmark results for a named benchmark. +// It allows adding multiple results and saving them to a file in JSON format. +type benchmarkTool struct { + name string + results []benchmarkResult +} + +// newBenchmarkTool creates a new benchmarkTool instance with the given name. +// The name is used as the base name for all results collected by this tool. +// When results are added, the unit is appended to this base name. +func newBenchmarkTool(name string) *benchmarkTool { + return &benchmarkTool{ + name: name, + results: make([]benchmarkResult, 0), + } +} + +// addResult adds a new benchmark result with the given value and unit. +// The result name is constructed by appending the unit to the benchmark name. +// Calling `addResult` is analogous to calling `b.ReportMetric()`. +func (b *benchmarkTool) addResult(value float64, unit string) { + result := benchmarkResult{ + Name: fmt.Sprintf("%s - %s", b.name, unit), + Value: strconv.FormatFloat(value, 'f', -1, 64), + Unit: unit, + } + b.results = append(b.results, result) +} + +// saveToFile writes all collected benchmark results to a JSON file at the +// specified path. The output is formatted with indentation for readability. +// Returns an error if marshaling or file writing fails. +func (b *benchmarkTool) saveToFile(path string) error { + output, err := json.MarshalIndent(b.results, "", " ") + if err != nil { + return err + } + + return os.WriteFile(path, output, perms.ReadWrite) +} + +// logResults logs all collected benchmark results using the provided logger. +func (b *benchmarkTool) logResults(log logging.Logger) { + for _, r := range b.results { + result := fmt.Sprintf("%s %s", r.Value, r.Unit) + log.Info(b.name, zap.String("result", result)) + } +} + // parseCustomLabels parses a comma-separated list of key-value pairs into a map // of custom labels. func parseCustomLabels(labelsStr string) (map[string]string, error) { @@ -586,13 +655,14 @@ func parseCustomLabels(labelsStr string) (map[string]string, error) { return labels, nil } -func getTopLevelMetrics(b *testing.B, registry prometheus.Gatherer, elapsed time.Duration) { - r := require.New(b) +func getTopLevelMetrics(tc tests.TestContext, tool *benchmarkTool, registry prometheus.Gatherer, elapsed time.Duration) { + r := require.New(tc) gasUsed, err := getCounterMetricValue(registry, "avalanche_evm_eth_chain_block_gas_used_processed") r.NoError(err) mgasPerSecond := gasUsed / 1_000_000 / elapsed.Seconds() - b.ReportMetric(mgasPerSecond, "mgas/s") + + tool.addResult(mgasPerSecond, "mgas/s") } func getCounterMetricValue(registry prometheus.Gatherer, query string) (float64, error) { diff --git a/tests/reexecute/db.go b/tests/reexecute/db.go index 89ce315c2f2c..609eb9a9ce3c 100644 --- a/tests/reexecute/db.go +++ b/tests/reexecute/db.go @@ -12,6 +12,7 @@ import ( "github.com/ava-labs/avalanchego/database" "github.com/ava-labs/avalanchego/database/leveldb" + "github.com/ava-labs/avalanchego/tests" "github.com/ava-labs/avalanchego/utils/logging" ) @@ -27,19 +28,16 @@ type BlockResult struct { // It opens the database at sourceDir and iterates through blocks from startBlock to endBlock (inclusive). // Blocks are read sequentially and sent to the returned channel as BlockResult values. // -// cleanup is a function that registers cleanup callbacks and is used to ensure -// that the database being read from is properly closed prior to the test terminating. -// // Any validation errors or iteration errors are sent as BlockResult with Err set, then the channel is closed. -func CreateBlockChanFromLevelDB(t require.TestingT, sourceDir string, startBlock, endBlock uint64, chanSize int, cleanup func(func())) (<-chan BlockResult, error) { - r := require.New(t) +func CreateBlockChanFromLevelDB(tc tests.TestContext, sourceDir string, startBlock, endBlock uint64, chanSize int) (<-chan BlockResult, error) { + r := require.New(tc) ch := make(chan BlockResult, chanSize) db, err := leveldb.New(sourceDir, nil, logging.NoLog{}, prometheus.NewRegistry()) if err != nil { return nil, fmt.Errorf("failed to create leveldb database from %q: %w", sourceDir, err) } - cleanup(func() { + tc.DeferCleanup(func() { r.NoError(db.Close()) })