@@ -76,11 +76,23 @@ const TICK_TIMEOUT: std::time::Duration = std::time::Duration::from_millis(1100)
7676/// Maximum number of known block hashes to keep for a peer.
7777const MAX_KNOWN_BLOCKS : usize = 1024 ; // ~32kb per peer + LruHashSet overhead
7878
79- /// If the block announces stream to peer has been inactive for two minutes meaning local node
79+ /// If the block announces stream to peer has been inactive for 30 seconds meaning local node
8080/// has not sent or received block announcements to/from the peer, report the node for inactivity,
8181/// disconnect it and attempt to establish connection to some other peer.
8282const INACTIVITY_EVICT_THRESHOLD : Duration = Duration :: from_secs ( 30 ) ;
8383
84+ /// When `SyncingEngine` is started, wait two minutes before actually staring to count peers as
85+ /// evicted.
86+ ///
87+ /// Parachain collator may incorrectly get evicted because it's waiting to receive a number of
88+ /// relaychain blocks before it can start creating parachain blocks. During this wait,
89+ /// `SyncingEngine` still counts it as active and as the peer is not sending blocks, it may get
90+ /// evicted if a block is not received within the first 30 secons since the peer connected.
91+ ///
92+ /// To prevent this from happening, define a threshold for how long `SyncingEngine` should wait
93+ /// before it starts evicting peers.
94+ const INITIAL_EVICTION_WAIT_PERIOD : Duration = Duration :: from_secs ( 2 * 60 ) ;
95+
8496mod rep {
8597 use sc_peerset:: ReputationChange as Rep ;
8698 /// Peer has different genesis.
@@ -243,6 +255,12 @@ pub struct SyncingEngine<B: BlockT, Client> {
243255
244256 /// Prometheus metrics.
245257 metrics : Option < Metrics > ,
258+
259+ /// When the syncing was started.
260+ ///
261+ /// Stored as an `Option<Instant>` so once the initial wait has passed, `SyncingEngine`
262+ /// can reset the peer timers and continue with the normal eviction process.
263+ syncing_started : Option < Instant > ,
246264}
247265
248266impl < B : BlockT , Client > SyncingEngine < B , Client >
@@ -389,6 +407,7 @@ where
389407 default_peers_set_num_light,
390408 event_streams : Vec :: new ( ) ,
391409 tick_timeout : Delay :: new ( TICK_TIMEOUT ) ,
410+ syncing_started : None ,
392411 metrics : if let Some ( r) = metrics_registry {
393412 match Metrics :: register ( r, is_major_syncing. clone ( ) ) {
394413 Ok ( metrics) => Some ( metrics) ,
@@ -607,6 +626,8 @@ where
607626 }
608627
609628 pub async fn run ( mut self ) {
629+ self . syncing_started = Some ( Instant :: now ( ) ) ;
630+
610631 loop {
611632 futures:: future:: poll_fn ( |cx| self . poll ( cx) ) . await ;
612633 }
@@ -619,6 +640,25 @@ where
619640
620641 while let Poll :: Ready ( ( ) ) = self . tick_timeout . poll_unpin ( cx) {
621642 self . report_metrics ( ) ;
643+ self . tick_timeout . reset ( TICK_TIMEOUT ) ;
644+
645+ // if `SyncingEngine` has just started, don't evict seemingly inactive peers right away
646+ // as they may not have produced blocks not because they've disconnected but because
647+ // they're still waiting to receive enough relaychain blocks to start producing blocks.
648+ if let Some ( started) = self . syncing_started {
649+ if started. elapsed ( ) < INITIAL_EVICTION_WAIT_PERIOD {
650+ continue
651+ }
652+
653+ // reset the peer activity timers so they don't expire right away after
654+ // the initial wait is done.
655+ for info in self . peers . values_mut ( ) {
656+ info. last_notification_received = Instant :: now ( ) ;
657+ info. last_notification_sent = Instant :: now ( ) ;
658+ }
659+
660+ self . syncing_started = None ;
661+ }
622662
623663 // go over all connected peers and check if any of them have been idle for a while. Idle
624664 // in this case means that we haven't sent or received block announcements to/from this
@@ -647,8 +687,6 @@ where
647687 self . evicted . insert ( * id) ;
648688 }
649689 }
650-
651- self . tick_timeout . reset ( TICK_TIMEOUT ) ;
652690 }
653691
654692 while let Poll :: Ready ( Some ( event) ) = self . service_rx . poll_next_unpin ( cx) {
0 commit comments