diff --git a/autometrics-macros/src/lib.rs b/autometrics-macros/src/lib.rs index f2f6713a..567dc40a 100644 --- a/autometrics-macros/src/lib.rs +++ b/autometrics-macros/src/lib.rs @@ -10,6 +10,7 @@ mod parse; const COUNTER_NAME_PROMETHEUS: &str = "function_calls_count"; const HISTOGRAM_BUCKET_NAME_PROMETHEUS: &str = "function_calls_duration_bucket"; const GAUGE_NAME_PROMETHEUS: &str = "function_calls_concurrent"; +const ADD_BUILD_INFO_LABELS: &str = "* on (instance, job) group_left(version, commit) build_info"; const DEFAULT_PROMETHEUS_URL: &str = "http://localhost:9090"; @@ -126,7 +127,11 @@ fn instrument_function(args: &AutometricsArgs, item: ItemFn) -> Result String { } fn request_rate_query(counter_name: &str, label_key: &str, label_value: &str) -> String { - format!("sum by (function, module) (rate({counter_name}{{{label_key}=\"{label_value}\"}}[5m]))") + format!("sum by (function, module, commit, version) (rate({counter_name}{{{label_key}=\"{label_value}\"}}[5m]) {ADD_BUILD_INFO_LABELS})") } fn error_ratio_query(counter_name: &str, label_key: &str, label_value: &str) -> String { let request_rate = request_rate_query(counter_name, label_key, label_value); - format!("sum by (function, module) (rate({counter_name}{{{label_key}=\"{label_value}\",result=\"error\"}}[5m])) / + format!("sum by (function, module, commit, version) (rate({counter_name}{{{label_key}=\"{label_value}\",result=\"error\"}}[5m]) {ADD_BUILD_INFO_LABELS}) +/ {request_rate}", ) } fn latency_query(bucket_name: &str, label_key: &str, label_value: &str) -> String { let latency = format!( - "sum by (le, function, module) (rate({bucket_name}{{{label_key}=\"{label_value}\"}}[5m]))" + "sum by (le, function, module, commit, version) (rate({bucket_name}{{{label_key}=\"{label_value}\"}}[5m]) {ADD_BUILD_INFO_LABELS})" ); format!( "histogram_quantile(0.99, {latency}) or @@ -280,5 +286,5 @@ histogram_quantile(0.95, {latency})" } fn concurrent_calls_query(gauge_name: &str, label_key: &str, label_value: &str) -> String { - format!("sum by (function, module) {gauge_name}{{{label_key}=\"{label_value}\"}}") + format!("sum by (function, module, commit, version) ({gauge_name}{{{label_key}=\"{label_value}\"}} {ADD_BUILD_INFO_LABELS})") } diff --git a/autometrics/Cargo.toml b/autometrics/Cargo.toml index 945d164a..6af6658b 100644 --- a/autometrics/Cargo.toml +++ b/autometrics/Cargo.toml @@ -48,6 +48,7 @@ const_format = { version = "0.2", features = ["rust_1_51"], optional = true } [dev-dependencies] regex = "1.7" http = "0.2" +vergen = { version = "8.1", features = ["git", "gitcl"] } [package.metadata.docs.rs] all-features = true diff --git a/autometrics/README.md b/autometrics/README.md index 4af3e69a..9dcd3bf8 100644 --- a/autometrics/README.md +++ b/autometrics/README.md @@ -29,6 +29,7 @@ Here is a demo of jumping from function docs to live Prometheus charts: - ✨ [`#[autometrics]`](https://docs.rs/autometrics/latest/autometrics/attr.autometrics.html) macro instruments any function or `impl` block to track the most useful metrics - 💡 Writes Prometheus queries so you can understand the data generated without knowing PromQL - 🔗 Injects links to live Prometheus charts directly into each function's doc comments +- [🔍 Identify commits](#identifying-commits-that-introduced-problems) that introduced errors or increased latency - [🚨 Define alerts](#alerts--slos) using SLO best practices directly in your source code - [📊 Grafana dashboards](#dashboards) work out of the box to visualize the performance of instrumented functions & SLOs - [⚙️ Configurable](#metrics-libraries) metric collection library (`opentelemetry`, `prometheus`, or `metrics`) @@ -96,6 +97,33 @@ Autometrics uses existing metrics libraries (see [below](#metrics-libraries)) to If you are already using one of these to collect metrics, simply configure autometrics to use the same library and the metrics it produces will be exported alongside yours. You do not need to use the Prometheus exporter functions this library provides and you do not need a separate endpoint for autometrics' metrics. +## Identifying commits that introduced problems + +Autometrics makes it easy to identify if a specific version or commit introduced errors or increased latencies. + +It uses a separate metric (`build_info`) to track the version and, optionally, git commit of your service. It then writes queries that group metrics by the `version` and `commit` labels so you can spot correlations between those and potential issues. + +The `version` is collected from the `CARGO_PKG_VERSION` environment variable, which `cargo` sets by default. You can override this by setting the compile-time environment variable `AUTOMETRICS_VERSION`. This follows the method outlined in [Exposing the software version to Prometheus](https://www.robustperception.io/exposing-the-software-version-to-prometheus/). + +To set the `commit`, you can either set the compile-time environment variable `AUTOMETRICS_COMMIT`, or have it set automatically using the [vergen](https://crates.io/crates/vergen) crate: + +```toml +# Cargo.toml + +[build-dependencies] +vergen = { version = "8.1", features = ["git", "gitoxide"] } +``` + +```rust +// build.rs +fn main() { + vergen::EmitBuilder::builder() + .git_sha(true) + .emit() + .expect("Unable to generate build info"); +} +``` + ## Dashboards Autometrics provides [Grafana dashboards](https://github.com/autometrics-dev/autometrics-shared#dashboards) that will work for any project instrumented with the library. diff --git a/autometrics/src/constants.rs b/autometrics/src/constants.rs index d8b24d4f..3d133a25 100644 --- a/autometrics/src/constants.rs +++ b/autometrics/src/constants.rs @@ -2,11 +2,14 @@ pub const COUNTER_NAME: &str = "function.calls.count"; pub const HISTOGRAM_NAME: &str = "function.calls.duration"; pub const GAUGE_NAME: &str = "function.calls.concurrent"; +pub const BUILD_INFO_NAME: &str = "build_info"; // Descriptions pub const COUNTER_DESCRIPTION: &str = "Autometrics counter for tracking function calls"; pub const HISTOGRAM_DESCRIPTION: &str = "Autometrics histogram for tracking function call duration"; pub const GAUGE_DESCRIPTION: &str = "Autometrics gauge for tracking concurrent function calls"; +pub const BUILD_INFO_DESCRIPTION: &str = + "Autometrics info metric for tracking software version and build details"; // Labels pub const FUNCTION_KEY: &'static str = "function"; @@ -18,3 +21,5 @@ pub const ERROR_KEY: &'static str = "error"; pub const OBJECTIVE_NAME: &'static str = "objective.name"; pub const OBJECTIVE_PERCENTILE: &'static str = "objective.percentile"; pub const OBJECTIVE_LATENCY_THRESHOLD: &'static str = "objective.latency_threshold"; +pub const VERSION_KEY: &'static str = "version"; +pub const COMMIT_KEY: &'static str = "commit"; diff --git a/autometrics/src/labels.rs b/autometrics/src/labels.rs index 1f6959be..56087d9d 100644 --- a/autometrics/src/labels.rs +++ b/autometrics/src/labels.rs @@ -4,6 +4,22 @@ use std::ops::Deref; pub(crate) type Label = (&'static str, &'static str); type ResultAndReturnTypeLabels = (&'static str, Option<&'static str>); +/// These are the labels used for the `build_info` metric. +pub struct BuildInfoLabels { + pub(crate) version: &'static str, + pub(crate) commit: &'static str, +} + +impl BuildInfoLabels { + pub fn new(version: &'static str, commit: &'static str) -> Self { + Self { version, commit } + } + + pub fn to_vec(&self) -> Vec