Skip to content

Commit c147732

Browse files
committed
metrics: add cluster_installer series
This series reports what installed the cluster. In order to do this, the openshift-install ConfigMap, which is written by the installer when doing an IPI install, is read. This ConfigMap is only injected during an IPI installation and contains the invoker (e.g. username, Hive) as well as the openshift-install version. The absence of this ConfigMap implies a UPI installation. This data will allow us to determine whether a cluster was installed IPI or UPI and, once CI and Hive have been updated, whether the cluster was created by CI or Hive. Because the data in this series never changes, it should compress nicely so we shouldn't have to worry about wasting space. If the data in the ConfigMap is missing, those label values are reported as `<missing>`. That was used instead of an empty value in order to distinguish between normal operation and an anomalous state. If the openshift-install ConfigMap exists, it will have all of those fields specified unless something were to erroneously remove them.
1 parent cb60da5 commit c147732

File tree

7 files changed

+191
-28
lines changed

7 files changed

+191
-28
lines changed

docs/dev/metrics.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,4 +55,14 @@ cluster_version_payload{type="pending",version="4.0.3"} 1
5555
# HELP cluster_operator_payload_errors Report the number of errors encountered applying the image.
5656
# TYPE cluster_operator_payload_errors gauge
5757
cluster_operator_payload_errors{version="4.0.3"} 10
58-
```
58+
```
59+
60+
Metrics about the installation:
61+
62+
`cluster_installer` records information about the installation process. The type is either "openshift-install", indicating that `openshift-install` was used to install the cluster (IPI) or "", indicating that an unknown process installed the cluster (UPI). When `openshift-install` creates a cluster, it will also report its version and invoker. The version is helpful for determining exactly which builds are being used to install (e.g. were they official builds or had they been modified). The invoker is "user" by default, but it may be overridden by a consuming tool (e.g. Hive, CI).
63+
64+
```
65+
# TYPE cluster_installer gauge
66+
cluster_installer{type="openshift-install",invoker="user",version="unreleased-master-1209-gfd08f44181f2111486749e2fb38399088f315cfb"} 1
67+
cluster_installer{type="",invoker="",version=""} 1
68+
```

pkg/cvo/cvo.go

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@ import (
1818
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1919
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
2020
"k8s.io/apimachinery/pkg/util/wait"
21+
informerscorev1 "k8s.io/client-go/informers/core/v1"
2122
"k8s.io/client-go/kubernetes"
2223
"k8s.io/client-go/kubernetes/scheme"
2324
coreclientsetv1 "k8s.io/client-go/kubernetes/typed/core/v1"
25+
listerscorev1 "k8s.io/client-go/listers/core/v1"
2426
"k8s.io/client-go/rest"
2527
"k8s.io/client-go/tools/cache"
2628
"k8s.io/client-go/tools/record"
@@ -35,8 +37,9 @@ import (
3537
"github.com/openshift/cluster-version-operator/lib/resourceapply"
3638
"github.com/openshift/cluster-version-operator/lib/resourcebuilder"
3739
"github.com/openshift/cluster-version-operator/lib/validation"
38-
"github.com/openshift/cluster-version-operator/pkg/cvo/internal"
40+
cvointernal "github.com/openshift/cluster-version-operator/pkg/cvo/internal"
3941
"github.com/openshift/cluster-version-operator/pkg/cvo/internal/dynamicclient"
42+
"github.com/openshift/cluster-version-operator/pkg/internal"
4043
"github.com/openshift/cluster-version-operator/pkg/payload"
4144
)
4245

@@ -101,6 +104,7 @@ type Operator struct {
101104

102105
cvLister configlistersv1.ClusterVersionLister
103106
coLister configlistersv1.ClusterOperatorLister
107+
cmLister listerscorev1.ConfigMapNamespaceLister
104108
proxyLister configlistersv1.ProxyLister
105109
cacheSynced []cache.InformerSynced
106110

@@ -136,6 +140,7 @@ func New(
136140
minimumInterval time.Duration,
137141
cvInformer configinformersv1.ClusterVersionInformer,
138142
coInformer configinformersv1.ClusterOperatorInformer,
143+
cmInformer informerscorev1.ConfigMapInformer,
139144
proxyInformer configinformersv1.ProxyInformer,
140145
client clientset.Interface,
141146
kubeClient kubernetes.Interface,
@@ -175,6 +180,8 @@ func New(
175180
optr.cvLister = cvInformer.Lister()
176181
optr.cacheSynced = append(optr.cacheSynced, cvInformer.Informer().HasSynced)
177182

183+
optr.cmLister = cmInformer.Lister().ConfigMaps(internal.ConfigNamespace)
184+
178185
if enableMetrics {
179186
if err := optr.registerMetrics(coInformer.Informer()); err != nil {
180187
panic(err)
@@ -515,7 +522,7 @@ type resourceBuilder struct {
515522
burstConfig *rest.Config
516523
modifier resourcebuilder.MetaV1ObjectModifierFunc
517524

518-
clusterOperators internal.ClusterOperatorsGetter
525+
clusterOperators cvointernal.ClusterOperatorsGetter
519526
}
520527

521528
// NewResourceBuilder creates the default resource builder implementation.
@@ -534,7 +541,7 @@ func (b *resourceBuilder) builderFor(m *lib.Manifest, state payload.State) (reso
534541
}
535542

536543
if b.clusterOperators != nil && m.GVK == configv1.SchemeGroupVersion.WithKind("ClusterOperator") {
537-
return internal.NewClusterOperatorBuilder(b.clusterOperators, *m), nil
544+
return cvointernal.NewClusterOperatorBuilder(b.clusterOperators, *m), nil
538545
}
539546
if resourcebuilder.Mapper.Exists(m.GVK) {
540547
return resourcebuilder.New(resourcebuilder.Mapper, config, *m)
@@ -543,7 +550,7 @@ func (b *resourceBuilder) builderFor(m *lib.Manifest, state payload.State) (reso
543550
if err != nil {
544551
return nil, err
545552
}
546-
return internal.NewGenericBuilder(client, *m)
553+
return cvointernal.NewGenericBuilder(client, *m)
547554
}
548555

549556
func (b *resourceBuilder) Apply(ctx context.Context, m *lib.Manifest, state payload.State) error {

pkg/cvo/cvo_test.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"github.com/google/uuid"
1515
apiextv1beta1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1"
1616
apiextclientv1 "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/typed/apiextensions/v1beta1"
17+
corev1 "k8s.io/api/core/v1"
1718
"k8s.io/apimachinery/pkg/api/errors"
1819
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1920
"k8s.io/apimachinery/pkg/labels"
@@ -152,6 +153,27 @@ func (r *coLister) Get(name string) (*configv1.ClusterOperator, error) {
152153
return nil, errors.NewNotFound(schema.GroupResource{}, name)
153154
}
154155

156+
type cmLister struct {
157+
Err error
158+
Items []*corev1.ConfigMap
159+
}
160+
161+
func (l *cmLister) List(selector labels.Selector) ([]*corev1.ConfigMap, error) {
162+
return l.Items, l.Err
163+
}
164+
165+
func (l *cmLister) Get(name string) (*corev1.ConfigMap, error) {
166+
if l.Err != nil {
167+
return nil, l.Err
168+
}
169+
for _, cm := range l.Items {
170+
if cm.Name == name {
171+
return cm, nil
172+
}
173+
}
174+
return nil, errors.NewNotFound(schema.GroupResource{}, name)
175+
}
176+
155177
type crdLister struct {
156178
Err error
157179
Items []*apiextv1beta1.CustomResourceDefinition

pkg/cvo/metrics.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package cvo
33
import (
44
"time"
55

6+
apierrors "k8s.io/apimachinery/pkg/api/errors"
67
"k8s.io/apimachinery/pkg/util/sets"
78

89
"github.com/prometheus/client_golang/prometheus"
@@ -13,6 +14,7 @@ import (
1314
configv1 "github.com/openshift/api/config/v1"
1415

1516
"github.com/openshift/cluster-version-operator/lib/resourcemerge"
17+
"github.com/openshift/cluster-version-operator/pkg/internal"
1618
)
1719

1820
func (optr *Operator) registerMetrics(coInformer cache.SharedInformer) error {
@@ -33,6 +35,7 @@ type operatorMetrics struct {
3335
clusterOperatorUp *prometheus.GaugeVec
3436
clusterOperatorConditions *prometheus.GaugeVec
3537
clusterOperatorConditionTransitions *prometheus.GaugeVec
38+
clusterInstaller *prometheus.GaugeVec
3639
}
3740

3841
func newOperatorMetrics(optr *Operator) *operatorMetrics {
@@ -78,6 +81,10 @@ version for 'cluster', or empty for 'initial'.
7881
Name: "cluster_operator_condition_transitions",
7982
Help: "Reports the number of times that a condition on a cluster operator changes status",
8083
}, []string{"name", "condition"}),
84+
clusterInstaller: prometheus.NewGaugeVec(prometheus.GaugeOpts{
85+
Name: "cluster_installer",
86+
Help: "Reports info about the installation process and, if applicable, the install tool.",
87+
}, []string{"type", "version", "invoker"}),
8188
}
8289
}
8390

@@ -127,6 +134,7 @@ func (m *operatorMetrics) Describe(ch chan<- *prometheus.Desc) {
127134
ch <- m.clusterOperatorUp.WithLabelValues("", "").Desc()
128135
ch <- m.clusterOperatorConditions.WithLabelValues("", "", "").Desc()
129136
ch <- m.clusterOperatorConditionTransitions.WithLabelValues("", "").Desc()
137+
ch <- m.clusterInstaller.WithLabelValues("", "", "").Desc()
130138
}
131139

132140
func (m *operatorMetrics) Collect(ch chan<- prometheus.Metric) {
@@ -269,6 +277,27 @@ func (m *operatorMetrics) Collect(ch chan<- prometheus.Metric) {
269277
g.Set(float64(value))
270278
ch <- g
271279
}
280+
281+
installer, err := m.optr.cmLister.Get(internal.InstallerConfigMap)
282+
if err == nil {
283+
version := "<missing>"
284+
invoker := "<missing>"
285+
286+
if v, ok := installer.Data["version"]; ok {
287+
version = v
288+
}
289+
if i, ok := installer.Data["invoker"]; ok {
290+
invoker = i
291+
}
292+
293+
g := m.clusterInstaller.WithLabelValues("openshift-install", version, invoker)
294+
g.Set(1.0)
295+
ch <- g
296+
} else if apierrors.IsNotFound(err) {
297+
g := m.clusterInstaller.WithLabelValues("", "", "")
298+
g.Set(1.0)
299+
ch <- g
300+
}
272301
}
273302

274303
// mostRecentTimestamp finds the most recent change recorded to the status and

0 commit comments

Comments
 (0)