Skip to content
This repository was archived by the owner on Nov 15, 2023. It is now read-only.

Commit e44b43e

Browse files
authored
Evict inactive peers from SyncingEngine (#13829)
* Evict inactive peers from `SyncingEngine` If both halves of the block announce notification stream have been inactive for 2 minutes, report the peer and disconnect it, allowing `SyncingEngine` to free up a slot for some other peer that hopefully is more active. This needs to be done because the node may falsely believe it has open connections to peers because the inbound substream can be closed without any notification and closed outbound substream is noticed only when node attempts to write to it which may not happen if the node has nothing to send. * zzz * wip * Evict peers only when timeout expires * Use `debug!()` --------- Co-authored-by: parity-processbot <>
1 parent 9f0d54a commit e44b43e

File tree

1 file changed

+50
-0
lines changed

1 file changed

+50
-0
lines changed

client/network/sync/src/engine.rs

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ use std::{
6767
Arc,
6868
},
6969
task::Poll,
70+
time::{Duration, Instant},
7071
};
7172

7273
/// Interval at which we perform time based maintenance
@@ -75,12 +76,19 @@ const TICK_TIMEOUT: std::time::Duration = std::time::Duration::from_millis(1100)
7576
/// Maximum number of known block hashes to keep for a peer.
7677
const MAX_KNOWN_BLOCKS: usize = 1024; // ~32kb per peer + LruHashSet overhead
7778

79+
/// If the block announces stream to peer has been inactive for two minutes meaning local node
80+
/// has not sent or received block announcements to/from the peer, report the node for inactivity,
81+
/// disconnect it and attempt to establish connection to some other peer.
82+
const INACTIVITY_EVICT_THRESHOLD: Duration = Duration::from_secs(30);
83+
7884
mod rep {
7985
use sc_peerset::ReputationChange as Rep;
8086
/// Peer has different genesis.
8187
pub const GENESIS_MISMATCH: Rep = Rep::new_fatal("Genesis mismatch");
8288
/// Peer send us a block announcement that failed at validation.
8389
pub const BAD_BLOCK_ANNOUNCEMENT: Rep = Rep::new(-(1 << 12), "Bad block announcement");
90+
/// Block announce substream with the peer has been inactive too long
91+
pub const INACTIVE_SUBSTREAM: Rep = Rep::new(-(1 << 10), "Inactive block announce substream");
8492
}
8593

8694
struct Metrics {
@@ -160,6 +168,10 @@ pub struct Peer<B: BlockT> {
160168
pub known_blocks: LruHashSet<B::Hash>,
161169
/// Notification sink.
162170
sink: NotificationsSink,
171+
/// Instant when the last notification was sent to peer.
172+
last_notification_sent: Instant,
173+
/// Instant when the last notification was received from peer.
174+
last_notification_received: Instant,
163175
}
164176

165177
pub struct SyncingEngine<B: BlockT, Client> {
@@ -200,6 +212,9 @@ pub struct SyncingEngine<B: BlockT, Client> {
200212
/// All connected peers. Contains both full and light node peers.
201213
peers: HashMap<PeerId, Peer<B>>,
202214

215+
/// Evicted peers
216+
evicted: HashSet<PeerId>,
217+
203218
/// List of nodes for which we perform additional logging because they are important for the
204219
/// user.
205220
important_peers: HashSet<PeerId>,
@@ -353,6 +368,7 @@ where
353368
chain_sync,
354369
network_service,
355370
peers: HashMap::new(),
371+
evicted: HashSet::new(),
356372
block_announce_data_cache: LruCache::new(cache_capacity),
357373
block_announce_protocol_name,
358374
num_connected: num_connected.clone(),
@@ -516,6 +532,7 @@ where
516532
},
517533
};
518534
peer.known_blocks.insert(hash);
535+
peer.last_notification_received = Instant::now();
519536

520537
if peer.info.roles.is_full() {
521538
let is_best = match announce.state.unwrap_or(BlockState::Best) {
@@ -566,6 +583,7 @@ where
566583
data: Some(data.clone()),
567584
};
568585

586+
peer.last_notification_sent = Instant::now();
569587
peer.sink.send_sync_notification(message.encode());
570588
}
571589
}
@@ -596,6 +614,35 @@ where
596614

597615
while let Poll::Ready(()) = self.tick_timeout.poll_unpin(cx) {
598616
self.report_metrics();
617+
618+
// go over all connected peers and check if any of them have been idle for a while. Idle
619+
// in this case means that we haven't sent or received block announcements to/from this
620+
// peer. If that is the case, because of #5685, it could be that the block announces
621+
// substream is not actually open and and this peer is just wasting a slot and is should
622+
// be replaced with some other node that is willing to send us block announcements.
623+
for (id, peer) in self.peers.iter() {
624+
// because of a delay between disconnecting a peer in `SyncingEngine` and getting
625+
// the response back from `Protocol`, a peer might be reported and disconnect
626+
// multiple times. To prevent this from happening (until the underlying issue is
627+
// fixed), keep track of evicted peers and report and disconnect them only once.
628+
if self.evicted.contains(id) {
629+
continue
630+
}
631+
632+
let last_received_late =
633+
peer.last_notification_received.elapsed() > INACTIVITY_EVICT_THRESHOLD;
634+
let last_sent_late =
635+
peer.last_notification_sent.elapsed() > INACTIVITY_EVICT_THRESHOLD;
636+
637+
if last_received_late && last_sent_late {
638+
log::debug!(target: "sync", "evict peer {id} since it has been idling for too long");
639+
self.network_service.report_peer(*id, rep::INACTIVE_SUBSTREAM);
640+
self.network_service
641+
.disconnect_peer(*id, self.block_announce_protocol_name.clone());
642+
self.evicted.insert(*id);
643+
}
644+
}
645+
599646
self.tick_timeout.reset(TICK_TIMEOUT);
600647
}
601648

@@ -692,6 +739,7 @@ where
692739
},
693740
},
694741
sc_network::SyncEvent::NotificationStreamClosed { remote } => {
742+
self.evicted.remove(&remote);
695743
if self.on_sync_peer_disconnected(remote).is_err() {
696744
log::trace!(
697745
target: "sync",
@@ -844,6 +892,8 @@ where
844892
NonZeroUsize::new(MAX_KNOWN_BLOCKS).expect("Constant is nonzero"),
845893
),
846894
sink,
895+
last_notification_sent: Instant::now(),
896+
last_notification_received: Instant::now(),
847897
};
848898

849899
let req = if peer.info.roles.is_full() {

0 commit comments

Comments
 (0)