Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions pkg/e2eanalysis/e2e_analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@ import (
"context"
"encoding/xml"
"fmt"
"io/ioutil"
"os"
"regexp"
"sort"
"strings"
"time"

"path/filepath"

"github.com/openshift/origin/pkg/dataloader"
"github.com/openshift/origin/pkg/test"
"github.com/openshift/origin/pkg/test/ginkgo/junitapi"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -117,6 +120,11 @@ func (tm *TestManager) GenerateReport(opt *Options) error {
filePrefix := "junit_e2e_analysis"
start := time.Now()
timeSuffix := fmt.Sprintf("_%s", start.UTC().Format("20060102-150405"))

// Write install duration metrics
installDurations := recordInstallDurations()
WriteDurations("install", installDurations, opt.JUnitDir, timeSuffix)

path := filepath.Join(opt.JUnitDir, fmt.Sprintf("%s_%s.xml", filePrefix, timeSuffix))
fmt.Fprintf(os.Stderr, "Writing JUnit report to %s\n", path)
err := os.WriteFile(path, test.StripANSI(out), 0640)
Expand Down Expand Up @@ -651,6 +659,52 @@ func getUnreadyOrUnschedulableNodeNames(allNodes *k8sv1.NodeList) []string {
return badNodeNames
}

func recordInstallDurations() map[string]time.Duration {
metrics := make(map[string]time.Duration)
sharedDir := os.Getenv("SHARED_DIR")
if sharedDir == "" {
logrus.Info("SHARED_DIR environment variable not set, skipping installation duration parsing.")
return metrics
}

durationFile := filepath.Join(sharedDir, "install-duration.log")
if _, err := os.Stat(durationFile); os.IsNotExist(err) {
logrus.Infof("Install duration log not found at %s, skipping.", durationFile)
return metrics
}

content, err := ioutil.ReadFile(durationFile)
if err != nil {
logrus.WithError(err).Warnf("Failed to read install-duration.log at %s", durationFile)
return metrics
}

bootstrapRegex := regexp.MustCompile(`Bootstrap Complete:\s+([\dhms]+)`)
totalRegex := regexp.MustCompile(`Time elapsed:\s+([\dhms]+)`)

bootstrapMatches := bootstrapRegex.FindStringSubmatch(string(content))
totalMatches := totalRegex.FindStringSubmatch(string(content))

if len(bootstrapMatches) > 1 {
durationStr := bootstrapMatches[1]
if duration, err := time.ParseDuration(durationStr); err == nil {
metrics["install_bootstrap"] = duration
} else {
logrus.WithError(err).Warnf("Could not parse bootstrap duration: %s", durationStr)
}
}

if len(totalMatches) > 1 {
durationStr := totalMatches[1]
if duration, err := time.ParseDuration(durationStr); err == nil {
metrics["install_overall"] = duration
} else {
logrus.WithError(err).Warnf("Could not parse total install duration: %s", durationStr)
}
}
return metrics
}

func objects(from *objx.Value) []objx.Map {
var values []objx.Map
switch {
Expand Down Expand Up @@ -683,3 +737,37 @@ func condition(cv objx.Map, condition string) objx.Map {
}
return objx.Map(nil)
}

// WriteDurations writes multiple duration metrics to a file in the autodl format.
func WriteDurations(name string, metrics map[string]time.Duration, artifactDir, timeSuffix string) {
var rows []map[string]string
for metricName, duration := range metrics {
rows = append(rows, map[string]string{
"name": metricName,
"duration": fmt.Sprintf("%d", duration.Milliseconds()),
})
}

// sort rows for consistent output
sort.Slice(rows, func(i, j int) bool {
return rows[i]["name"] < rows[j]["name"]
})

if len(rows) == 0 {
return
}

dataFile := dataloader.DataFile{
TableName: "duration-metrics",
Schema: map[string]dataloader.DataType{
"name": dataloader.DataTypeString,
"duration": dataloader.DataTypeInteger,
},
Rows: rows,
}
fileName := filepath.Join(artifactDir, fmt.Sprintf("duration-metrics-%s%s-%s", name, timeSuffix, dataloader.AutoDataLoaderSuffix))
logrus.Infof("Writing duration metrics to %s", fileName)
if err := dataloader.WriteDataFile(fileName, dataFile); err != nil {
logrus.WithError(err).Warnf("unable to write duration metric data file for %s: %s", name, fileName)
}
}
8 changes: 8 additions & 0 deletions pkg/test/ginkgo/cmd_runsuite.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"github.com/openshift/origin/pkg/clioptions/clusterdiscovery"
"github.com/openshift/origin/pkg/clioptions/clusterinfo"
"github.com/openshift/origin/pkg/defaultmonitortests"
e2e_analysis "github.com/openshift/origin/pkg/e2eanalysis"
"github.com/openshift/origin/pkg/monitor"
monitorserialization "github.com/openshift/origin/pkg/monitor/serialization"
"github.com/openshift/origin/pkg/monitortestframework"
Expand Down Expand Up @@ -579,6 +580,12 @@ func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdisc
logrus.Infof("Retry strategy %s decided not to retry %d failing tests", o.RetryStrategy.Name(), fail)
}

endWithRetries := time.Now()
durationWithRetries := endWithRetries.Sub(start).Round(time.Second / 10)
if durationWithRetries > time.Minute {
durationWithRetries = durationWithRetries.Round(time.Second)
}

// monitor the cluster while the tests are running and report any detected anomalies
var syntheticTestResults []*junitapi.JUnitTestCase
var syntheticFailure bool
Expand Down Expand Up @@ -679,6 +686,7 @@ func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdisc
}

writeRunSuiteOptions(seed, totalNodes, workerNodes, parallelism, monitorTestInfo, o.JUnitDir, timeSuffix)
e2e_analysis.WriteDurations("e2e", map[string]time.Duration{"e2e": duration, "e2e_with_retries": durationWithRetries}, o.JUnitDir, timeSuffix)
}

switch {
Expand Down
2 changes: 2 additions & 0 deletions test/e2e/upgrade/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import (
"k8s.io/kubernetes/test/e2e/upgrades/apps"
"k8s.io/kubernetes/test/e2e/upgrades/node"

e2e_analysis "github.com/openshift/origin/pkg/e2eanalysis"
"github.com/openshift/origin/test/e2e/upgrade/adminack"
"github.com/openshift/origin/test/e2e/upgrade/dns"
"github.com/openshift/origin/test/e2e/upgrade/manifestdelete"
Expand Down Expand Up @@ -604,6 +605,7 @@ func clusterUpgrade(f *framework.Framework, c configv1client.Interface, dc dynam
// record whether the cluster was fast or slow upgrading. Don't fail the test, we still want signal on the actual tests themselves.
upgradeEnded := time.Now()
upgradeDuration := upgradeEnded.Sub(upgradeStarted)
e2e_analysis.WriteDurations("upgrade", map[string]time.Duration{"upgrade": upgradeDuration}, framework.TestContext.ReportDir, fmt.Sprintf("_%s", upgradeStarted.UTC().Format("20060102-150405")))
testCaseName := fmt.Sprintf("[sig-cluster-lifecycle] cluster upgrade should complete in a reasonable time")
failure := ""
if upgradeDuration > upgradeDurationLimit {
Expand Down