diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index 99c58abaf7..d3b485710f 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -2830,6 +2830,9 @@ node_pressure_io_stalled_seconds_total 159.229614 # HELP node_pressure_io_waiting_seconds_total Total time in seconds that processes have waited due to IO congestion # TYPE node_pressure_io_waiting_seconds_total counter node_pressure_io_waiting_seconds_total 159.886802 +# HELP node_pressure_irq_stalled_seconds_total Total time in seconds no process could make progress due to IRQ congestion +# TYPE node_pressure_irq_stalled_seconds_total counter +node_pressure_irq_stalled_seconds_total 0.008494 # HELP node_pressure_memory_stalled_seconds_total Total time in seconds no process could make progress due to memory congestion # TYPE node_pressure_memory_stalled_seconds_total counter node_pressure_memory_stalled_seconds_total 0 diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index d52cb99d04..1a6448fb2d 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -2852,6 +2852,9 @@ node_pressure_io_stalled_seconds_total 159.229614 # HELP node_pressure_io_waiting_seconds_total Total time in seconds that processes have waited due to IO congestion # TYPE node_pressure_io_waiting_seconds_total counter node_pressure_io_waiting_seconds_total 159.886802 +# HELP node_pressure_irq_stalled_seconds_total Total time in seconds no process could make progress due to IRQ congestion +# TYPE node_pressure_irq_stalled_seconds_total counter +node_pressure_irq_stalled_seconds_total 0.008494 # HELP node_pressure_memory_stalled_seconds_total Total time in seconds no process could make progress due to memory congestion # TYPE node_pressure_memory_stalled_seconds_total counter node_pressure_memory_stalled_seconds_total 0 diff --git a/collector/fixtures/proc/pressure/irq b/collector/fixtures/proc/pressure/irq new file mode 100644 index 0000000000..76059c7572 --- /dev/null +++ b/collector/fixtures/proc/pressure/irq @@ -0,0 +1 @@ +full avg10=0.00 avg60=0.00 avg300=0.00 total=8494 \ No newline at end of file diff --git a/collector/pressure_linux.go b/collector/pressure_linux.go index 4dbdc5393d..ac25d7c12c 100644 --- a/collector/pressure_linux.go +++ b/collector/pressure_linux.go @@ -29,7 +29,7 @@ import ( ) var ( - psiResources = []string{"cpu", "io", "memory"} + psiResources = []string{"cpu", "io", "memory", "irq"} ) type pressureStatsCollector struct { @@ -38,6 +38,7 @@ type pressureStatsCollector struct { ioFull *prometheus.Desc mem *prometheus.Desc memFull *prometheus.Desc + irqFull *prometheus.Desc fs procfs.FS @@ -81,6 +82,11 @@ func NewPressureStatsCollector(logger log.Logger) (Collector, error) { "Total time in seconds no process could make progress due to memory congestion", nil, nil, ), + irqFull: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "pressure", "irq_stalled_seconds_total"), + "Total time in seconds no process could make progress due to IRQ congestion", + nil, nil, + ), fs: fs, logger: logger, }, nil @@ -102,7 +108,9 @@ func (c *pressureStatsCollector) Update(ch chan<- prometheus.Metric) error { } return fmt.Errorf("failed to retrieve pressure stats: %w", err) } - if vals.Some == nil { + // IRQ pressure does not have 'some' data. + // See https://github.com/torvalds/linux/blob/v6.9/include/linux/psi_types.h#L65 + if vals.Some == nil && res != "irq" { level.Debug(c.logger).Log("msg", "pressure information returned no 'some' data") return ErrNoData } @@ -119,6 +127,8 @@ func (c *pressureStatsCollector) Update(ch chan<- prometheus.Metric) error { case "memory": ch <- prometheus.MustNewConstMetric(c.mem, prometheus.CounterValue, float64(vals.Some.Total)/1000.0/1000.0) ch <- prometheus.MustNewConstMetric(c.memFull, prometheus.CounterValue, float64(vals.Full.Total)/1000.0/1000.0) + case "irq": + ch <- prometheus.MustNewConstMetric(c.irqFull, prometheus.CounterValue, float64(vals.Full.Total)/1000.0/1000.0) default: level.Debug(c.logger).Log("msg", "did not account for resource", "resource", res) }