@@ -67,6 +67,7 @@ use std::{
6767 Arc ,
6868 } ,
6969 task:: Poll ,
70+ time:: { Duration , Instant } ,
7071} ;
7172
7273/// Interval at which we perform time based maintenance
@@ -75,12 +76,19 @@ const TICK_TIMEOUT: std::time::Duration = std::time::Duration::from_millis(1100)
7576/// Maximum number of known block hashes to keep for a peer.
7677const MAX_KNOWN_BLOCKS : usize = 1024 ; // ~32kb per peer + LruHashSet overhead
7778
79+ /// If the block announces stream to peer has been inactive for two minutes meaning local node
80+ /// has not sent or received block announcements to/from the peer, report the node for inactivity,
81+ /// disconnect it and attempt to establish connection to some other peer.
82+ const INACTIVITY_EVICT_THRESHOLD : Duration = Duration :: from_secs ( 30 ) ;
83+
7884mod rep {
7985 use sc_peerset:: ReputationChange as Rep ;
8086 /// Peer has different genesis.
8187 pub const GENESIS_MISMATCH : Rep = Rep :: new_fatal ( "Genesis mismatch" ) ;
8288 /// Peer send us a block announcement that failed at validation.
8389 pub const BAD_BLOCK_ANNOUNCEMENT : Rep = Rep :: new ( -( 1 << 12 ) , "Bad block announcement" ) ;
90+ /// Block announce substream with the peer has been inactive too long
91+ pub const INACTIVE_SUBSTREAM : Rep = Rep :: new ( -( 1 << 10 ) , "Inactive block announce substream" ) ;
8492}
8593
8694struct Metrics {
@@ -160,6 +168,10 @@ pub struct Peer<B: BlockT> {
160168 pub known_blocks : LruHashSet < B :: Hash > ,
161169 /// Notification sink.
162170 sink : NotificationsSink ,
171+ /// Instant when the last notification was sent to peer.
172+ last_notification_sent : Instant ,
173+ /// Instant when the last notification was received from peer.
174+ last_notification_received : Instant ,
163175}
164176
165177pub struct SyncingEngine < B : BlockT , Client > {
@@ -200,6 +212,9 @@ pub struct SyncingEngine<B: BlockT, Client> {
200212 /// All connected peers. Contains both full and light node peers.
201213 peers : HashMap < PeerId , Peer < B > > ,
202214
215+ /// Evicted peers
216+ evicted : HashSet < PeerId > ,
217+
203218 /// List of nodes for which we perform additional logging because they are important for the
204219 /// user.
205220 important_peers : HashSet < PeerId > ,
@@ -353,6 +368,7 @@ where
353368 chain_sync,
354369 network_service,
355370 peers : HashMap :: new ( ) ,
371+ evicted : HashSet :: new ( ) ,
356372 block_announce_data_cache : LruCache :: new ( cache_capacity) ,
357373 block_announce_protocol_name,
358374 num_connected : num_connected. clone ( ) ,
@@ -516,6 +532,7 @@ where
516532 } ,
517533 } ;
518534 peer. known_blocks . insert ( hash) ;
535+ peer. last_notification_received = Instant :: now ( ) ;
519536
520537 if peer. info . roles . is_full ( ) {
521538 let is_best = match announce. state . unwrap_or ( BlockState :: Best ) {
@@ -566,6 +583,7 @@ where
566583 data : Some ( data. clone ( ) ) ,
567584 } ;
568585
586+ peer. last_notification_sent = Instant :: now ( ) ;
569587 peer. sink . send_sync_notification ( message. encode ( ) ) ;
570588 }
571589 }
@@ -596,6 +614,35 @@ where
596614
597615 while let Poll :: Ready ( ( ) ) = self . tick_timeout . poll_unpin ( cx) {
598616 self . report_metrics ( ) ;
617+
618+ // go over all connected peers and check if any of them have been idle for a while. Idle
619+ // in this case means that we haven't sent or received block announcements to/from this
620+ // peer. If that is the case, because of #5685, it could be that the block announces
621+ // substream is not actually open and and this peer is just wasting a slot and is should
622+ // be replaced with some other node that is willing to send us block announcements.
623+ for ( id, peer) in self . peers . iter ( ) {
624+ // because of a delay between disconnecting a peer in `SyncingEngine` and getting
625+ // the response back from `Protocol`, a peer might be reported and disconnect
626+ // multiple times. To prevent this from happening (until the underlying issue is
627+ // fixed), keep track of evicted peers and report and disconnect them only once.
628+ if self . evicted . contains ( id) {
629+ continue
630+ }
631+
632+ let last_received_late =
633+ peer. last_notification_received . elapsed ( ) > INACTIVITY_EVICT_THRESHOLD ;
634+ let last_sent_late =
635+ peer. last_notification_sent . elapsed ( ) > INACTIVITY_EVICT_THRESHOLD ;
636+
637+ if last_received_late && last_sent_late {
638+ log:: debug!( target: "sync" , "evict peer {id} since it has been idling for too long" ) ;
639+ self . network_service . report_peer ( * id, rep:: INACTIVE_SUBSTREAM ) ;
640+ self . network_service
641+ . disconnect_peer ( * id, self . block_announce_protocol_name . clone ( ) ) ;
642+ self . evicted . insert ( * id) ;
643+ }
644+ }
645+
599646 self . tick_timeout . reset ( TICK_TIMEOUT ) ;
600647 }
601648
@@ -692,6 +739,7 @@ where
692739 } ,
693740 } ,
694741 sc_network:: SyncEvent :: NotificationStreamClosed { remote } => {
742+ self . evicted . remove ( & remote) ;
695743 if self . on_sync_peer_disconnected ( remote) . is_err ( ) {
696744 log:: trace!(
697745 target: "sync" ,
@@ -844,6 +892,8 @@ where
844892 NonZeroUsize :: new ( MAX_KNOWN_BLOCKS ) . expect ( "Constant is nonzero" ) ,
845893 ) ,
846894 sink,
895+ last_notification_sent : Instant :: now ( ) ,
896+ last_notification_received : Instant :: now ( ) ,
847897 } ;
848898
849899 let req = if peer. info . roles . is_full ( ) {
0 commit comments