Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
Prev Previous commit
Next Next commit
Add Macos observability lib (#28)
* Add gitignore to node-observ-lib

* Fix typo in node default filteringSelector

* Prep alert group names for macos

* Add macos-observ-lib

* Change overview dashboard:
show networkErrorsAndDroppedPerSec instead of networkErrorPerSec for Linux/MacOS

* Add more alerts

* Move alerts to sep file

* Breaking: Update layout

To allow to locally import linux from macos

* Bring back NodeFilesystemAlmostOutOfFiles alert

* Show only errors when they occur

* Only show network interfaces that had traffic change at least once during selected dashboard interval
  • Loading branch information
v-zhuravlev authored Nov 28, 2023
commit 94e744ecdd595e3dbe425d6671b1d96dd2d58f7b
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
.jekyll-cache
jsonnetfile.lock.json
vendor
12 changes: 6 additions & 6 deletions docs/node-observ-lib/jsonnetfile.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,20 @@
{
"source": {
"git": {
"remote": "https://github.com/grafana/jsonnet-libs.git",
"subdir": "common-lib"
"remote": "https://github.com/grafana/grafonnet.git",
"subdir": "gen/grafonnet-v10.0.0"
}
},
"version": "master"
"version": "main"
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet.git",
"subdir": "gen/grafonnet-v10.0.0"
"remote": "https://github.com/grafana/jsonnet-libs.git",
"subdir": "common-lib"
}
},
"version": "main"
"version": "master"
},
{
"source": {
Expand Down
56 changes: 0 additions & 56 deletions docs/node-observ-lib/jsonnetfile.lock.json

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ You can use observ-lib to fill in monitoring-mixin structure:

```jsonnet
// mixin.libsonnet file
local nodelib = import 'node-observ-lib/main.libsonnet';
local nodelib = import 'node-observ-lib/linux/main.libsonnet';

local linux =
nodelib.new()
Expand Down Expand Up @@ -45,7 +45,7 @@ local linux =

```jsonnet
// mixin.libsonnet file
local nodelib = import 'node-observ-lib/main.libsonnet';
local nodelib = import 'node-observ-lib/linux/main.libsonnet';

local linux =
nodelib.new()
Expand Down Expand Up @@ -82,7 +82,7 @@ local linux =

// mixin.libsonnet file
local configOverride = import './overrides.libsonnet';
local nodelib = import 'node-observ-lib/main.libsonnet';
local nodelib = import 'node-observ-lib/linux/main.libsonnet';

local linux =
nodelib.new()
Expand All @@ -101,7 +101,7 @@ local linux =
```jsonnet
local g = import './g.libsonnet';
// mixin.libsonnet file
local nodelib = import 'node-observ-lib/main.libsonnet';
local nodelib = import 'node-observ-lib/linux/main.libsonnet';

local linux =
nodelib.new()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
new(this): {
groups: [
{
name: if this.config.uid == 'node' then 'node-exporter-filesystem' else this.config.uid + '-linux-filesystem-alerts',
name: if this.config.uid == 'node' then 'node-exporter-filesystem' else this.config.uid + '-filesystem-alerts',
rules: [
{
alert: 'NodeFilesystemSpaceFillingUp',
Expand Down Expand Up @@ -160,7 +160,7 @@
},
{
// defaults to 'node-exporter for backward compatibility with old node-mixin
name: if this.config.uid == 'node' then 'node-exporter' else this.config.uid + '-linux-alerts',
name: if this.config.uid == 'node' then 'node-exporter' else this.config.uid + '-alerts',
rules: [
{
alert: 'NodeNetworkReceiveErrs',
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
local g = import './g.libsonnet';
local g = import '../g.libsonnet';
local commonlib = import 'common-lib/common/main.libsonnet';
{
new(this):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// 'instanceLabels' - one or more labels that can be used to identify single entity of instances. In simple cases, can be 'instance' or 'pod'.
// 'uid' - UID to prefix all dashboards original uids

filteringSelector: std.get(self, 'nodeExporterSelector', default='"job="node"'),
filteringSelector: std.get(self, 'nodeExporterSelector', default='job="node"'),
groupLabels: ['job'],
instanceLabels: ['instance'],
dashboardNamePrefix: 'Node exporter / ',
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
local g = import './g.libsonnet';
local g = import '../g.libsonnet';
local logslib = import 'github.com/grafana/jsonnet-libs/logs-lib/logs/main.libsonnet';
{
local root = self,
Expand Down Expand Up @@ -59,7 +59,7 @@ local logslib = import 'github.com/grafana/jsonnet-libs/logs-lib/logs/main.libso
panels.diskUsage { gridPos+: { w: 12, h: 8 } },
g.panel.row.new('Network'),
panels.networkUsagePerSec { gridPos+: { w: 12, h: 8 } },
panels.networkErrorsPerSec { gridPos+: { w: 12, h: 8 } },
panels.networkErrorsAndDroppedPerSec { gridPos+: { w: 12, h: 8 } },
], 6, 2
)
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
local g = import './g.libsonnet';
local g = import '../g.libsonnet';
local commonlib = import 'common-lib/common/main.libsonnet';
{
new(this):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
local g = import './g.libsonnet';
local g = import '../g.libsonnet';
local commonlib = import 'common-lib/common/main.libsonnet';
local utils = commonlib.utils;
{
Expand Down Expand Up @@ -680,12 +680,18 @@ local utils = commonlib.utils;
networkErrorsAndDroppedPerSec:
commonlib.panels.network.timeSeries.errors.new(
'Network errors and dropped packets',
targets=[
t.networkOutErrorsPerSec,
t.networkInErrorsPerSec,
t.networkOutDroppedPerSec,
t.networkInDroppedPerSec,
],
targets=std.map(
function(t) t
{
expr: t.expr + '>0',
},
[
t.networkOutErrorsPerSec,
t.networkInErrorsPerSec,
t.networkOutDroppedPerSec,
t.networkInDroppedPerSec,
]
),
description=|||
**Network errors**:

Expand All @@ -711,7 +717,7 @@ local utils = commonlib.utils;
targets=std.map(
function(t) t
{
expr: 'topk(25, ' + t.expr + ')>0.5',
expr: 'topk(25, ' + t.expr + ')>0',
legendFormat: '{{' + this.config.instanceLabels[0] + '}}: ' + std.get(t, 'legendFormat', '{{ nic }}'),
},
[
Expand Down Expand Up @@ -757,7 +763,7 @@ local utils = commonlib.utils;
+ commonlib.panels.network.timeSeries.errors.withNegateOutPackets(),
networkUsagePerSec:
commonlib.panels.network.timeSeries.traffic.new(
targets=[t.networkInBitPerSec, t.networkOutBitPerSec]
targets=[t.networkInBitPerSecFiltered, t.networkOutBitPerSecFiltered]
)
+ commonlib.panels.network.timeSeries.traffic.withNegateOutPackets(),
networkPacketsPerSec:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
local g = import './g.libsonnet';
local g = import '../g.libsonnet';
local prometheusQuery = g.query.prometheus;
local lokiQuery = g.query.loki;

Expand Down Expand Up @@ -696,6 +696,34 @@ local lokiQuery = g.query.loki;
'irate(node_network_receive_bytes_total{%(queriesSelector)s}[$__rate_interval])*8' % variables
)
+ prometheusQuery.withLegendFormat('{{ device }} received'),
networkOutBitPerSecFiltered:
prometheusQuery.new(
prometheusDatasource,
|||
irate(node_network_transmit_bytes_total{%(queriesSelector)s}[$__rate_interval])*8
# only show interfaces that had traffic change at least once during selected dashboard interval:
and
increase(
node_network_transmit_bytes_total{%(queriesSelector)s}[$__range]
) > 0
||| % variables
)
+ prometheusQuery.withLegendFormat('{{ device }} transmitted'),
networkInBitPerSecFiltered:
prometheusQuery.new(
prometheusDatasource,
|||
irate(node_network_receive_bytes_total{%(queriesSelector)s}[$__rate_interval])*8
# only show interfaces that had traffic change at least once during selected dashboard interval:
and
increase(
node_network_receive_bytes_total{%(queriesSelector)s}[$__range]
) > 0
||| % variables
)
+ prometheusQuery.withLegendFormat('{{ device }} received'),


networkOutErrorsPerSec:
prometheusQuery.new(
prometheusDatasource,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// variables.libsonnet
local g = import './g.libsonnet';
local g = import '../g.libsonnet';
local var = g.dashboard.variable;
local commonlib = import 'common-lib/common/main.libsonnet';
local utils = commonlib.utils;
Expand Down
86 changes: 86 additions & 0 deletions docs/node-observ-lib/macos/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# MacOS exporter observability lib

This jsonnet observability lib can be used to generate observability package for node exporter(MacOS).

## Import

```sh
jb init
jb install https://github.com/grafana/node_exporter/docs/node-observ-lib
```

## Examples

### Example 1: Basic example

You can use observ-lib to fill in monitoring-mixin structure:

```jsonnet
// mixin.libsonnet file
local macoslib = import 'node-observ-lib/macos/main.libsonnet';

local mac =
macoslib.new()
+ macoslib.withConfigMixin({
filteringSelector: 'job=~".*mac.*"',
groupLabels: ['job'],
instanceLabels: ['instance'],
dashboardNamePrefix: 'MacOS / ',
dashboardTags: ['macos-mixin'],
uid: 'darwin',
// enable loki logs
enableLokiLogs: true,
});

{
grafanaDashboards+:: mac.grafana.dashboards,
prometheusAlerts+:: mac.prometheus.alerts,
prometheusRules+:: mac.prometheus.recordingRules,
}

```
For more examples see [node-observ-lib/linux](../linux).

## Collectors used:

Grafana Agent or combination of node_exporter/promtail can be used in order to collect data required.

### Logs collection

Loki logs are used to populate logs dashboard and also for annotations.

To use logs, you need to opt-in, with setting `enableLokiLogs: true` in config.

See example above.

The following scrape snippet can be used in grafana-agent/promtail:

```yaml
- job_name: integrations/node_exporter_direct_scrape
static_configs:
- targets:
- localhost
labels:
__path__: /var/log/*.log
instance: '<your-instance-name>'
job: integrations/macos-node
pipeline_stages:
- multiline:
firstline: '^([\w]{3} )?[\w]{3} +[\d]+ [\d]+:[\d]+:[\d]+|[\w]{4}-[\w]{2}-[\w]{2} [\w]{2}:[\w]{2}:[\w]{2}(?:[+-][\w]{2})?'
- regex:
expression: '(?P<timestamp>([\w]{3} )?[\w]{3} +[\d]+ [\d]+:[\d]+:[\d]+|[\w]{4}-[\w]{2}-[\w]{2} [\w]{2}:[\w]{2}:[\w]{2}(?:[+-][\w]{2})?) (?P<hostname>\S+) (?P<sender>.+?)\[(?P<pid>\d+)\]:? (?P<message>(?s:.*))$'
- labels:
sender:
hostname:
pid:
- match:
selector: '{sender!="", pid!=""}'
stages:
- template:
source: message
template: '{{ .sender }}[{{ .pid }}]: {{ .message }}'
- labeldrop:
- pid
- output:
source: message
```
23 changes: 23 additions & 0 deletions docs/node-observ-lib/macos/alerts.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
new(this, parentPrometheus):
{
groups:
//keep only alerts listed in alertsMacKeep
std.filter(
function(group) std.length(group.rules) > 0,
[
{
name: group.name,
rules: [
rule
for rule in group.rules
if std.length(std.find(rule.alert, this.config.alertsMacKeep)) > 0
],
}
for group in parentPrometheus.alerts.groups
],

),

},
}
Loading