Skip to content
This repository was archived by the owner on Nov 15, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1790,7 +1790,7 @@
"steppedLine": false,
"targets": [
{
"expr": "avg(increase(${metric_namespace}_sub_libp2p_connections_closed_total{instance=~\"${nodename}\"}[$__interval])) by (reason)",
"expr": "avg(sum(rate(${metric_namespace}_sub_libp2p_connections_closed_total{instance=~\"${nodename}\"}[$__interval])) by (instance, reason)) by (reason)",
"interval": "",
"legendFormat": "{{reason}}",
"refId": "A"
Expand Down Expand Up @@ -2719,4 +2719,4 @@
"list": []
},
"version": 103
}
}
65 changes: 44 additions & 21 deletions client/network/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -854,6 +854,8 @@ struct Metrics {
// This list is ordered alphabetically
connections_closed_total: CounterVec<U64>,
connections_opened_total: CounterVec<U64>,
distinct_peers_connections_closed_total: CounterVec<U64>,
distinct_peers_connections_opened_total: CounterVec<U64>,
import_queue_blocks_submitted: Counter<U64>,
import_queue_finality_proofs_submitted: Counter<U64>,
import_queue_justifications_submitted: Counter<U64>,
Expand Down Expand Up @@ -889,14 +891,28 @@ impl Metrics {
connections_closed_total: register(CounterVec::new(
Opts::new(
"sub_libp2p_connections_closed_total",
"Total number of connections closed, by reason and direction"
"Total number of connections closed, by direction and reason"
),
&["direction", "reason"]
)?, registry)?,
connections_opened_total: register(CounterVec::new(
Opts::new(
"sub_libp2p_connections_opened_total",
"Total number of connections opened"
"Total number of connections opened by direction"
),
&["direction"]
)?, registry)?,
distinct_peers_connections_closed_total: register(CounterVec::new(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
distinct_peers_connections_closed_total: register(CounterVec::new(
distinct_peers_connections_closed_total: register(Counter::new(

Opts::new(
"sub_libp2p_distinct_peers_connections_closed_total",
"Total number of connections closed with distinct peers, by direction and reason"
),
&["direction", "reason"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
&["direction", "reason"]

I didn't think about it, but I don't think it makes sense to have the direction or reason on these metrics.

It would be misleading to report a single direction while we might have multiple connections with multiple directions.

Similarly, the reason here is the reason for closing the last connection, which to me makes the last connection special while it shouldn't be.
For instance, if a severe protocol error happens on the "main" connection, we immediately close it and also close all the other connections to that same peer afterwards. The reason reported here would be the one of the other connection (if any), which doesn't really make sense.

)?, registry)?,
distinct_peers_connections_opened_total: register(CounterVec::new(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same idea: remove the direction.

Suggested change
distinct_peers_connections_opened_total: register(CounterVec::new(
distinct_peers_connections_opened_total: register(Counter::new(

Opts::new(
"sub_libp2p_distinct_peers_connections_opened_total",
"Total number of connections opened with distinct peers by direction"
),
&["direction"]
)?, registry)?,
Expand Down Expand Up @@ -1214,40 +1230,47 @@ impl<B: BlockT + 'static, H: ExHashT> Future for NetworkWorker<B, H> {
}
this.event_streams.send(ev);
},
Poll::Ready(SwarmEvent::ConnectionEstablished { peer_id, endpoint, .. }) => {
Poll::Ready(SwarmEvent::ConnectionEstablished { peer_id, endpoint, num_established }) => {
trace!(target: "sub-libp2p", "Libp2p => Connected({:?})", peer_id);

if let Some(metrics) = this.metrics.as_ref() {
match endpoint {
ConnectedPoint::Dialer { .. } =>
metrics.connections_opened_total.with_label_values(&["out"]).inc(),
ConnectedPoint::Listener { .. } =>
metrics.connections_opened_total.with_label_values(&["in"]).inc(),
let direction = match endpoint {
ConnectedPoint::Dialer { .. } => "out",
ConnectedPoint::Listener { .. } => "in",
};

metrics.connections_opened_total.with_label_values(&[direction]).inc();

if num_established.get() == 1 {
metrics.distinct_peers_connections_opened_total.with_label_values(&[direction]).inc();
}
}
},
Poll::Ready(SwarmEvent::ConnectionClosed { peer_id, cause, endpoint, .. }) => {
Poll::Ready(SwarmEvent::ConnectionClosed { peer_id, cause, endpoint, num_established }) => {
trace!(target: "sub-libp2p", "Libp2p => Disconnected({:?}, {:?})", peer_id, cause);
if let Some(metrics) = this.metrics.as_ref() {
let dir = match endpoint {
let direction = match endpoint {
ConnectedPoint::Dialer { .. } => "out",
ConnectedPoint::Listener { .. } => "in",
};

match cause {
ConnectionError::IO(_) =>
metrics.connections_closed_total.with_label_values(&[dir, "transport-error"]).inc(),
let reason = match cause {
ConnectionError::IO(_) => "transport-error",
ConnectionError::Handler(NodeHandlerWrapperError::Handler(EitherError::A(EitherError::A(
EitherError::A(EitherError::A(EitherError::B(
EitherError::A(PingFailure::Timeout)))))))) =>
metrics.connections_closed_total.with_label_values(&[dir, "ping-timeout"]).inc(),
EitherError::A(PingFailure::Timeout)))))))) => "ping-timeout",
ConnectionError::Handler(NodeHandlerWrapperError::Handler(EitherError::A(EitherError::A(
EitherError::A(EitherError::A(EitherError::A(
EitherError::B(LegacyConnectionKillError)))))))) =>
metrics.connections_closed_total.with_label_values(&[dir, "force-closed"]).inc(),
ConnectionError::Handler(NodeHandlerWrapperError::Handler(_)) =>
metrics.connections_closed_total.with_label_values(&[dir, "protocol-error"]).inc(),
ConnectionError::Handler(NodeHandlerWrapperError::KeepAliveTimeout) =>
metrics.connections_closed_total.with_label_values(&[dir, "keep-alive-timeout"]).inc(),
EitherError::B(LegacyConnectionKillError)))))))) => "force-closed",
ConnectionError::Handler(NodeHandlerWrapperError::Handler(_)) => "protocol-error",
ConnectionError::Handler(NodeHandlerWrapperError::KeepAliveTimeout) => "keep-alive-timeout",
};

metrics.connections_closed_total.with_label_values(&[direction, reason]).inc();

// `num_established` represents the number of *remaining* connections.
if num_established == 0 {
metrics.distinct_peers_connections_closed_total.with_label_values(&[direction, reason]).inc();
}
}
},
Expand Down