Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
9f9c439
Adds a New Condition for PVC Resize Errors
andrewlecuyer Feb 7, 2025
a942197
Initial configuration for an OpenTelemetry Collector
cbandy Dec 23, 2024
3ea8f17
Add an OTel Collector with Patroni metrics
dsessler7 Jan 6, 2025
c3a98fb
Add PgBouncer metrics
dsessler7 Jan 14, 2025
9fcef77
Parse Postgres and pgAudit logs using the OTel Collector
benjaminjb Jan 22, 2025
08ab9a4
Parse Patroni logs
dsessler7 Jan 22, 2025
2e59c1b
Parse PgBouncer logs using the OTel Collector
dsessler7 Jan 29, 2025
96e1ffb
Scrape pgAdmin logs using the OTel collector
tony-landreth Jan 29, 2025
ee9bf60
Add pgBackRest repohost log collector
benjaminjb Feb 1, 2025
836572d
Validate and strip/minify Collector SQL files
cbandy Feb 7, 2025
f2a80ac
Change pgbackrest init for running containers
benjaminjb Feb 7, 2025
0dcb1be
Bump controller-gen to v0.17.2
cbandy Feb 10, 2025
fbb4f32
Change PostgresIdentifier to a type alias
cbandy Jan 3, 2025
7089149
Add k8s attributes to patroni logs. Add CompactingProcessor to patron…
dsessler7 Feb 7, 2025
8e37a1f
Create initial API for OTel instrumentation. Allow users to configure…
dsessler7 Feb 9, 2025
38fc33a
Add instrumentation_scope.name and log.record.original attributes to …
dsessler7 Feb 9, 2025
3602c70
Add configurable collector (#4092)
benjaminjb Feb 12, 2025
f7e9625
Add shared functions for quoting shell words
cbandy Nov 4, 2024
d4483cc
Add a function for setting permission on directories
cbandy Feb 10, 2025
e6ea78b
Store pgAdmin log file positions in the logs directory
cbandy Feb 6, 2025
951fa40
Ensure Postgres and Patroni log directories are writable
cbandy Feb 10, 2025
88130ca
Ensure pgBackRest log directories are writable
cbandy Feb 11, 2025
8dbe427
Add a field specifying when to delete log files
cbandy Feb 14, 2025
1797f8f
Rotate PgBouncer logs using specified retention
dsessler7 Feb 11, 2025
8b87822
Document a Kubernetes bug with the duration format
cbandy Feb 18, 2025
85636a8
Add an API struct representing a single Secret value
cbandy Jan 15, 2025
ef1eae0
Allow more control over the arguments to pg_upgrade
cbandy Dec 9, 2024
510ddf4
Validate pg_upgrade versions at the API server
cbandy Feb 19, 2025
e4dfdf2
Add a validated field for Postgres parameters
cbandy Dec 20, 2024
e884806
Otel pgMonitor metrics (#4096)
tony-landreth Feb 21, 2025
00c9068
Add reload logic to collector container start script.
dsessler7 Feb 19, 2025
19a28f7
Add a test helper that unmarshals JSON and YAML
cbandy Feb 26, 2025
9977db2
If the OpenTelemetryLogs feature gate is set, tell patroni to log to …
dsessler7 Feb 26, 2025
bfd4160
Add resources from API to OTEL sidecar (#4104)
benjaminjb Feb 26, 2025
6ba9057
Change PostgresCluster.spec.config to a pointer
cbandy Feb 26, 2025
2a2fe9b
Calculate Postgres parameters in the controller
cbandy Feb 26, 2025
9018342
Rotate postgres logs according to retentionPeriod in spec.
dsessler7 Feb 20, 2025
d04885c
Clone embedded metrics variable to avoid continuous appending.
dsessler7 Feb 28, 2025
00a93f6
Add a script to help with bumping dependencies
cbandy Feb 28, 2025
6dbbf9b
Bump golang.org/x/crypto and golang.org/x/oauth2
cbandy Feb 28, 2025
b50bae9
Rotate pgbackrest (#4108)
benjaminjb Mar 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add PgBouncer metrics
A generator converts YAML with comments to JSON to avoid errors at
runtime and comments in the binary.

Co-authored-by: Chris Bandy <[email protected]>
Issue: PGO-2054
  • Loading branch information
dsessler7 and cbandy committed Feb 7, 2025
commit c3a98fb33db90c35058157cf77090e26c1d996a5
14 changes: 9 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -254,16 +254,16 @@ generate-kuttl: ## Generate kuttl tests
##@ Generate

.PHONY: check-generate
check-generate: ## Check crd, deepcopy functions, and rbac generation
check-generate: generate-crd
check-generate: generate-deepcopy
check-generate: generate-rbac
check-generate: ## Check everything generated is also committed
check-generate: generate
git diff --exit-code -- config/crd
git diff --exit-code -- config/rbac
git diff --exit-code -- internal/collector
git diff --exit-code -- pkg/apis

.PHONY: generate
generate: ## Generate crd, deepcopy functions, and rbac
generate: ## Generate everything
generate: generate-collector
generate: generate-crd
generate: generate-deepcopy
generate: generate-rbac
Expand All @@ -276,6 +276,10 @@ generate-crd: tools/controller-gen
paths='./pkg/apis/...' \
output:dir='config/crd/bases' # {directory}/{group}_{plural}.yaml

.PHONY: generate-collector
generate-collector: ## Generate OTel Collector files
$(GO) generate ./internal/collector

.PHONY: generate-deepcopy
generate-deepcopy: ## Generate DeepCopy functions
generate-deepcopy: tools/controller-gen
Expand Down
48 changes: 48 additions & 0 deletions internal/collector/generate_json.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright 2024 - 2025 Crunchy Data Solutions, Inc.
//
// SPDX-License-Identifier: Apache-2.0

//go:build generate

//go:generate go run generate_json.go

package main

import (
"bytes"
"log/slog"
"os"
"path/filepath"
"strings"

"sigs.k8s.io/yaml"
)

func main() {
cwd := need(os.Getwd())
yamlFileNames := []string{}

slog.Info("Reading", "directory", cwd)
for _, entry := range need(os.ReadDir(cwd)) {
if entry.Type() == 0 && strings.HasSuffix(entry.Name(), ".yaml") {
yamlFileNames = append(yamlFileNames, entry.Name())
}
}

for _, yamlName := range yamlFileNames {
slog.Info("Reading", "file", yamlName)
jsonData := need(yaml.YAMLToJSONStrict(need(os.ReadFile(yamlName))))
jsonPath := filepath.Join("generated", strings.TrimSuffix(yamlName, ".yaml")+".json")

slog.Info("Writing", "file", jsonPath)
must(os.WriteFile(jsonPath, append(bytes.TrimSpace(jsonData), '\n'), 0o644))
}
}

func must(err error) { need(0, err) }
func need[V any](v V, err error) V {
if err != nil {
panic(err)
}
return v
}
2 changes: 2 additions & 0 deletions internal/collector/generated/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# https://docs.github.com/en/repositories/working-with-files/managing-files/customizing-how-changed-files-appear-on-github
/*.json linguist-generated=true

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions internal/collector/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ func AddToPod(
inInstanceConfigMap *corev1.ConfigMap,
outPod *corev1.PodSpec,
volumeMounts []corev1.VolumeMount,
sqlQueryPassword string,
) {
if !feature.Enabled(ctx, feature.OpenTelemetryMetrics) {
return
Expand Down Expand Up @@ -69,6 +70,12 @@ func AddToPod(
Image: "ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.116.1",
ImagePullPolicy: inCluster.Spec.ImagePullPolicy,
Command: []string{"/otelcol-contrib", "--config", "/etc/otel-collector/config.yaml"},
Env: []corev1.EnvVar{
{
Name: "PGPASSWORD",
Value: sqlQueryPassword,
},
},

SecurityContext: initialize.RestrictedSecurityContext(),
VolumeMounts: append(volumeMounts, configVolumeMount),
Expand Down
1 change: 1 addition & 0 deletions internal/collector/naming.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ const OneSecondBatchProcessor = "batch/1s"
const SubSecondBatchProcessor = "batch/200ms"
const Prometheus = "prometheus"
const Metrics = "metrics"
const SqlQuery = "sqlquery"
53 changes: 53 additions & 0 deletions internal/collector/patroni.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright 2024 - 2025 Crunchy Data Solutions, Inc.
//
// SPDX-License-Identifier: Apache-2.0

package collector

import (
"context"

"github.com/crunchydata/postgres-operator/internal/feature"
"github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1"
)

func EnablePatroniMetrics(ctx context.Context,
inCluster *v1beta1.PostgresCluster,
outConfig *Config,
) {
if feature.Enabled(ctx, feature.OpenTelemetryMetrics) {
// Add Prometheus exporter
outConfig.Exporters[Prometheus] = map[string]any{
"endpoint": "0.0.0.0:8889",
}

// Add Prometheus Receiver
outConfig.Receivers[Prometheus] = map[string]any{
"config": map[string]any{
"scrape_configs": []map[string]any{
{
"job_name": "patroni",
"scheme": "https",
"tls_config": map[string]any{
"insecure_skip_verify": true,
},
"scrape_interval": "10s",
"static_configs": []map[string]any{
{
"targets": []string{
"0.0.0.0:8008",
},
},
},
},
},
},
}

// Add Metrics Pipeline
outConfig.Pipelines[Metrics] = Pipeline{
Receivers: []ComponentID{Prometheus},
Exporters: []ComponentID{Prometheus},
}
}
}
62 changes: 62 additions & 0 deletions internal/collector/pgbouncer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright 2024 - 2025 Crunchy Data Solutions, Inc.
//
// SPDX-License-Identifier: Apache-2.0

package collector

import (
"context"
_ "embed"
"encoding/json"
"fmt"
"slices"

"github.com/crunchydata/postgres-operator/internal/feature"
"github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1"
)

// The contents of "pgbouncer_metrics_queries.yaml" as JSON.
// See: https://pkg.go.dev/embed
//
//go:embed "generated/pgbouncer_metrics_queries.json"
var pgBouncerMetricsQueries json.RawMessage

// NewConfigForPgBouncerPod creates a config for the OTel collector container
// that runs as a sidecar in the pgBouncer Pod
func NewConfigForPgBouncerPod(
ctx context.Context, cluster *v1beta1.PostgresCluster, sqlQueryUsername string,
) *Config {
if cluster.Spec.Proxy == nil || cluster.Spec.Proxy.PGBouncer == nil {
// pgBouncer is disabled; return nil
return nil
}

config := NewConfig()

EnablePgBouncerMetrics(ctx, config, sqlQueryUsername)

return config
}

func EnablePgBouncerMetrics(ctx context.Context, config *Config, sqlQueryUsername string) {
if feature.Enabled(ctx, feature.OpenTelemetryMetrics) {
// Add Prometheus exporter
config.Exporters[Prometheus] = map[string]any{
"endpoint": "0.0.0.0:8889",
}

// Add SqlQuery Receiver
config.Receivers[SqlQuery] = map[string]any{
"driver": "postgres",
"datasource": fmt.Sprintf(`host=localhost dbname=pgbouncer port=5432 user=%s password=${env:PGPASSWORD}`,
sqlQueryUsername),
"queries": slices.Clone(pgBouncerMetricsQueries),
}

// Add Metrics Pipeline
config.Pipelines[Metrics] = Pipeline{
Receivers: []ComponentID{SqlQuery},
Exporters: []ComponentID{Prometheus},
}
}
}
99 changes: 99 additions & 0 deletions internal/collector/pgbouncer_metrics_queries.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# This list of queries configures an OTel SQL Query Receiver to read pgMonitor
# metrics from PgBouncer.
#
# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries
# https://github.com/CrunchyData/pgmonitor/blob/v5.1.1/sql_exporter/common/crunchy_pgbouncer_121_collector.yml

- sql: "SHOW CLIENTS"
metrics:
- metric_name: ccp_pgbouncer_clients_wait_seconds
value_column: wait
attribute_columns: ["database", "user", "state", "application_name", "link"]
description: "Current waiting time in seconds"

- sql: "SHOW DATABASES"
metrics:
- metric_name: ccp_pgbouncer_databases_pool_size
value_column: pool_size
attribute_columns: ["name", "host", "port", "database", "force_user", "pool_mode"]
description: "Maximum number of server connections"

- metric_name: ccp_pgbouncer_databases_min_pool_size
value_column: min_pool_size
attribute_columns: ["name", "host", "port", "database", "force_user", "pool_mode"]
description: "Minimum number of server connections"

- metric_name: ccp_pgbouncer_databases_reserve_pool
value_column: reserve_pool
attribute_columns: ["name", "host", "port", "database", "force_user", "pool_mode"]
description: "Maximum number of additional connections for this database"

- metric_name: ccp_pgbouncer_databases_max_connections
value_column: max_connections
attribute_columns: ["name", "host", "port", "database", "force_user", "pool_mode"]
description: >-
Maximum number of allowed connections for this database,
as set by max_db_connections, either globally or per database

- metric_name: ccp_pgbouncer_databases_current_connections
value_column: current_connections
attribute_columns: ["name", "host", "port", "database", "force_user", "pool_mode"]
description: "Current number of connections for this database"

- metric_name: ccp_pgbouncer_databases_paused
value_column: paused
attribute_columns: ["name", "host", "port", "database", "force_user", "pool_mode"]
description: "1 if this database is currently paused, else 0"

- metric_name: ccp_pgbouncer_databases_disabled
value_column: disabled
attribute_columns: ["name", "host", "port", "database", "force_user", "pool_mode"]
description: "1 if this database is currently disabled, else 0"

- sql: "SHOW LISTS"
metrics:
- metric_name: ccp_pgbouncer_lists_item_count
value_column: items
attribute_columns: ["list"]
description: "Count of items registered with pgBouncer"

- sql: "SHOW POOLS"
metrics:
- metric_name: ccp_pgbouncer_pools_client_active
value_column: cl_active
attribute_columns: ["database", "user"]
description: >-
Client connections that are either linked to server connections or
are idle with no queries waiting to be processed

- metric_name: ccp_pgbouncer_pools_client_waiting
value_column: cl_waiting
attribute_columns: ["database", "user"]
description: "Client connections that have sent queries but have not yet got a server connection"

- metric_name: ccp_pgbouncer_pools_server_active
value_column: sv_active
attribute_columns: ["database", "user"]
description: "Server connections that are linked to a client"

- metric_name: ccp_pgbouncer_pools_server_idle
value_column: sv_idle
attribute_columns: ["database", "user"]
description: "Server connections that are unused and immediately usable for client queries"

- metric_name: ccp_pgbouncer_pools_server_used
value_column: sv_used
attribute_columns: ["database", "user"]
description: >-
Server connections that have been idle for more than server_check_delay,
so they need server_check_query to run on them before they can be used again

- sql: "SHOW SERVERS"
metrics:
- metric_name: ccp_pgbouncer_servers_close_needed
value_column: close_needed
attribute_columns: ["database", "user", "state", "application_name", "link"]
description: >-
1 if the connection will be closed as soon as possible,
because a configuration file reload or DNS update changed the connection information
or RECONNECT was issued
40 changes: 3 additions & 37 deletions internal/collector/postgres.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,47 +7,13 @@ package collector
import (
"context"

"github.com/crunchydata/postgres-operator/internal/feature"
"github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1"
)

func NewConfigForPostgresPod(ctx context.Context) *Config {
func NewConfigForPostgresPod(ctx context.Context, inCluster *v1beta1.PostgresCluster) *Config {
config := NewConfig()

if feature.Enabled(ctx, feature.OpenTelemetryMetrics) {
// Add Prometheus exporter
config.Exporters[Prometheus] = map[string]any{
"endpoint": "0.0.0.0:8889",
}

// Add Prometheus Receiver
config.Receivers[Prometheus] = map[string]any{
"config": map[string]any{
"scrape_configs": []map[string]any{
{
"job_name": "patroni",
"scheme": "https",
"tls_config": map[string]any{
"insecure_skip_verify": true,
},
"scrape_interval": "10s",
"static_configs": []map[string]any{
{
"targets": []string{
"0.0.0.0:8008",
},
},
},
},
},
},
}

// Add Metrics Pipeline
config.Pipelines[Metrics] = Pipeline{
Receivers: []ComponentID{Prometheus},
Exporters: []ComponentID{Prometheus},
}
}
EnablePatroniMetrics(ctx, inCluster, config)

return config
}
2 changes: 1 addition & 1 deletion internal/controller/postgrescluster/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ func (r *Reconciler) Reconcile(
pgbackrest.PostgreSQL(cluster, &pgParameters, backupsSpecFound)
pgmonitor.PostgreSQLParameters(cluster, &pgParameters)

otelConfig := collector.NewConfigForPostgresPod(ctx)
otelConfig := collector.NewConfigForPostgresPod(ctx, cluster)

// Set huge_pages = try if a hugepages resource limit > 0, otherwise set "off"
postgres.SetHugePages(cluster, &pgParameters)
Expand Down
2 changes: 1 addition & 1 deletion internal/controller/postgrescluster/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -1201,7 +1201,7 @@ func (r *Reconciler) reconcileInstance(
}

if err == nil && feature.Enabled(ctx, feature.OpenTelemetryMetrics) {
collector.AddToPod(ctx, cluster, instanceConfigMap, &instance.Spec.Template.Spec, nil)
collector.AddToPod(ctx, cluster, instanceConfigMap, &instance.Spec.Template.Spec, nil, "")
}

// Add pgMonitor resources to the instance Pod spec
Expand Down
Loading