diff --git a/content/en/docs/getting-started/administration/assets/grafana-sample.json b/content/en/docs/getting-started/administration/assets/grafana-sample.json index c028dfd55..8be91456a 100644 --- a/content/en/docs/getting-started/administration/assets/grafana-sample.json +++ b/content/en/docs/getting-started/administration/assets/grafana-sample.json @@ -1,342 +1,377 @@ { - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 27, - "links": [], - "liveNow": false, - "panels": [ + "annotations": { + "list": [ { + "builtIn": 1, "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "type": "grafana", + "uid": "-- Grafana --" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 30, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 3, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "rate(apiserver_request_total{resource=~\"managedclusters|managedclusteraddons|managedclustersetbindings|managedclustersets|addonplacementscores|placementdecisions|placements|manifestworks|manifestworkreplicasets\"}[1m])", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "API Request Count", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "rate(apiserver_request_total{resource=~\"managedclusters|managedclusteraddons|managedclustersetbindings|managedclustersets|addonplacementscores|placementdecisions|placements|manifestworks|manifestworkreplicasets|appliedmanifestworks|clusterclaims|klusterlets\",cluster_name=\"$cluster\"}[1m])", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "API Request Count", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 10 - }, - "id": 1, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "rate(container_cpu_usage_seconds_total{namespace=~\"open-cluster-management.*\"}[3m])", - "hide": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "CPU Usage", - "transformations": [], - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "rate(container_cpu_usage_seconds_total{namespace=~\"open-cluster-management.*\",cluster_name=\"$cluster\"}[3m])", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "unit": "bytes" + "thresholdsStyle": { + "mode": "off" + } }, - "overrides": [] + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 18 + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "id": 2, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "editorMode": "code", + "expr": "container_memory_working_set_bytes{namespace=~\"open-cluster-management.*\",cluster_name=\"$cluster\"}", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Memory Usage", + "type": "timeseries" + } + ], + "preload": false, + "refresh": "", + "schemaVersion": 42, + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "prometheus" }, - "targets": [ + "includeAll": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": "Hub", + "current": { + "text": "hub", + "value": "hub" + }, + "definition": "label_values(container_memory_working_set_bytes{job=\"opentelemetry-collector\"},cluster_name)", + "includeAll": false, + "label": "Cluster", + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(container_memory_working_set_bytes{job=\"opentelemetry-collector\"},cluster_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "staticOptions": [ { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "container_memory_working_set_bytes{namespace=~\"open-cluster-management.*\"}", - "legendFormat": "__auto", - "range": true, - "refId": "A" + "text": "hub", + "value": "hub" } ], - "title": "Memory Usage", - "transformations": [], - "type": "timeseries" + "type": "query" } - ], - "refresh": "", - "schemaVersion": 38, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "prometheus", - "value": "prometheus" - }, - "hide": 0, - "includeAll": false, - "multi": false, - "name": "datasource", - "options": [], - "query": "prometheus", - "queryValue": "", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": {}, - "timezone": "", - "title": "OCM", - "uid": "c1c387c4-6ef9-4b14-8435-69f6b0b409e9", - "version": 5, - "weekStart": "" - } \ No newline at end of file + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "OCM", + "uid": "c1c387c4-6ef9-4b14-8435-69f6b0b409e9", + "version": 1 +} \ No newline at end of file diff --git a/content/en/docs/getting-started/administration/monitoring.md b/content/en/docs/getting-started/administration/monitoring.md index 59782b9dd..85ec92790 100644 --- a/content/en/docs/getting-started/administration/monitoring.md +++ b/content/en/docs/getting-started/administration/monitoring.md @@ -1,40 +1,59 @@ --- -title: Monitoring OCM using Prometheus-Operator +title: Monitoring OCM using OpenTelemetry Collector Addon and Prometheus-Operator weight: 1 --- -In this page, we provide a way to monitor your OCM environment using Prometheus-Operator. +In this page, we provide a way to monitor your OCM environment using OpenTelemetry Collector Addon and Prometheus-Operator. +## Overview +The OpenTelemetry Collector Addon is a pluggable addon for Open Cluster Management (OCM) that automates the deployment and management of OpenTelemetry collector on managed clusters. Built on the addon-framework, it provides observability and metrics collection capabilities across your multi-cluster environment. +``` +┌─────────────────┐ ┌──────────────────┐ ┌──────────────────┐ +│ Hub Cluster │ │ Managed Cluster │ │ Managed Cluster │ +│ │ │ │ │ │ +│ ┌─────────────┐ │ │ ┌──────────────┐ │ │ ┌──────────────┐ │ +│ │ Prometheus │ │◄───┤ │ OTEL │ │ │ │ OTEL │ │ +│ │ (Remote │ │ │ │ Collector │ │ │ │ Collector │ │ +│ │ Write) │ │ │ │ │ │ │ │ │ │ +│ └─────────────┘ │ │ └──────────────┘ │ │ └──────────────┘ │ +│ │ │ │ │ │ │ │ +│ ┌─────────────┐ │ │ ┌──────▼──────┐ │ │ ┌──────▼──────┐ │ +│ │ OTEL Addon │ │ │ │ Node Metrics│ │ │ │ Node Metrics│ │ +│ │ Manager │ │ │ │ cAdvisor │ │ │ │ cAdvisor │ │ +│ └─────────────┘ │ │ └─────────────┘ │ │ └─────────────┘ │ +└─────────────────┘ └──────────────────┘ └──────────────────┘ +``` ## Before you get started -You must have an OCM environment set up. You can also follow our recommended [quick start guide]({{< ref "docs/getting-started/quick-start" >}}) to set up a playground OCM environment. +1. You must have an OCM environment set up. You can also follow our recommended [quick start guide]({{< ref "docs/getting-started/quick-start" >}}) to set up a playground OCM environment. +2. You must have Helm 3.x installed. +3. You have kubectl configured to access your hub cluster. -And then please [install the Prometheus-Operator](https://prometheus-operator.dev/docs/prologue/quick-start/) in your hub cluster. You can also run the following commands copied from the official doc: +And then install the OpenTelemetry addon with automatic Prometheus stack and certificate generation: ```shell -git clone https://github.com/prometheus-operator/kube-prometheus.git -cd kube-prometheus - -# Create the namespace and CRDs, and then wait for them to be availble before creating the remaining resources -kubectl create -f manifests/setup +# Clone the repository +git clone https://github.com/open-cluster-management-io/addon-contrib.git +cd addon-contrib/open-telemetry-addon -# Wait until the "servicemonitors" CRD is created. The message "No resources found" means success in this context. -until kubectl get servicemonitors --all-namespaces ; do date; sleep 1; echo ""; done - -kubectl create -f manifests/ +# Install everything (certificates, prometheus, addon) +oc config use-context kind-hub # run it in hub cluster +make install-all ``` +Refer to the [OpenTelemetry Collector Addon README](https://github.com/open-cluster-management-io/addon-contrib/tree/main/open-telemetry-addon#configuration) for more configuration options and verification steps. -## Monitoring the control-plane resource usage. +## Monitoring the control-plane resource usage -You can use `kubectl proxy` to open prometheus UI in your browser on [localhost:9090](http://localhost:9090/): +You can use `kubectl proxy` to proxy the prometheus service to your localhost: ```shell -kubectl --namespace monitoring port-forward svc/prometheus-k8s 9090 +kubectl --namespace monitoring port-forward svc/prometheus-stack-kube-prom-prometheus 9090 ``` +Then, open your browser and navigate to [https://localhost:9090](https://localhost:9090) to access the Prometheus UI. -The following queries are to monitor the control-plane pods' cpu usage, memory usage and apirequestcount for critical CRs: +The following queries are to monitor the control-plane pods' cpu usage, memory usage and apirequestcount: ```shell rate(container_cpu_usage_seconds_total{namespace=~"open-cluster-management.*"}[3m]) @@ -55,15 +74,18 @@ We provide an initial grafana dashboard for you to visualize the metrics. But yo First, use the following command to proxy grafana service: ```shell -kubectl --namespace monitoring port-forward svc/grafana 3000 +kubectl --namespace monitoring port-forward svc/prometheus-stack-grafana 80 ``` -Next, open the grafana UI in your browser on [localhost:3000](http://localhost:3000/login). +Next, open the grafana UI in your browser on [localhost:80](http://localhost/login). Get Grafana 'admin' user password by running: +```shell +kubectl --namespace monitoring get secrets prometheus-stack-grafana -o jsonpath="{.data.admin-password}" | base64 -d ; echo +``` Click the "Import Dashboard" and run the following command to copy a sample dashboard and paste it to the grafana: ```shell -curl https://raw.githubusercontent.com/open-cluster-management-io/open-cluster-management-io.github.io/main/content/en/getting-started/administration/assets/grafana-sample.json | pbcopy +curl https://raw.githubusercontent.com/open-cluster-management-io/open-cluster-management-io.github.io/main/content/en/docs/getting-started/administration/assets/grafana-sample.json | pbcopy ``` Then, you will get a sample grafana dashboard that you can fine-tune further: diff --git a/static/sample-grafana.png b/static/sample-grafana.png index 8d26b1515..2c5becfe8 100644 Binary files a/static/sample-grafana.png and b/static/sample-grafana.png differ