This repository was archived by the owner on Nov 15, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Block ActiveLeavesUpdate and BlockFinalized events in overseer until major sync is complete
#6689
Closed
Closed
Changes from 4 commits
Commits
Show all changes
19 commits
Select commit
Hold shift + click to select a range
ae29bb4
Add sync oracle to overseer
tdimitrov 70d031d
Don't send `ActiveLeavesUpdate` and `BlockFinalized` until full sync
tdimitrov 349c89f
Comments and indentation
tdimitrov 5bae570
Remove unnecessary `clone()`
tdimitrov fe09d12
Fix `BlockFinalized` handling in main loop
tdimitrov 4c4a4db
Pass `SyncOracle` as a parameter to `dummy_overseer_builder`
tdimitrov cfcd098
Some tests
tdimitrov 27b0cf5
Fix initial sync loop
tdimitrov 2204966
More tests
tdimitrov 2ba4a97
Update node/overseer/src/lib.rs
tdimitrov 969f71a
Extract initial ActiveLeaves update logic in `prepare_initial_active_…
tdimitrov 81a64e5
Update tests
tdimitrov 6bdb390
Better comments in tests
tdimitrov f8467d8
Remove unneeded async
tdimitrov a3840a7
Fix `dummy_overseer_builder` usage
tdimitrov ade3d9d
Remove `Option<Box<dyn SyncOracle + Send>>` from `MajorSyncOracle` an…
tdimitrov ba1167b
Remove code duplication
tdimitrov 809a612
Don't send `ActiveLeaves` on stratup; do it when the first fresh leaf…
tdimitrov 2e14f46
Undo some method extractions
tdimitrov File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -83,6 +83,7 @@ use polkadot_node_subsystem_types::messages::{ | |
| DisputeDistributionMessage, GossipSupportMessage, NetworkBridgeRxMessage, | ||
| NetworkBridgeTxMessage, ProvisionerMessage, RuntimeApiMessage, StatementDistributionMessage, | ||
| }; | ||
| use sp_consensus::SyncOracle; | ||
|
|
||
| pub use polkadot_node_subsystem_types::{ | ||
| errors::{SubsystemError, SubsystemResult}, | ||
|
|
@@ -330,6 +331,40 @@ pub async fn forward_events<P: BlockchainEvents<Block>>(client: Arc<P>, mut hand | |
| } | ||
| } | ||
|
|
||
| /// Used to detect if the node is in initial major sync. | ||
| /// It's worth mentioning that this is a one way check. Once the initial full sync is complete | ||
| /// `MajorSyncOracle` will never return false. The reason is that the struct is meant to be used | ||
| /// only during initialization. | ||
| pub struct MajorSyncOracle { | ||
| sync_oracle: Option<Box<dyn SyncOracle + Send>>, | ||
| } | ||
|
|
||
| impl MajorSyncOracle { | ||
| /// Create `MajorSyncOracle` from `SyncOracle` | ||
| pub fn new(sync_oracle: Box<dyn SyncOracle + Send>) -> Self { | ||
| Self { sync_oracle: Some(sync_oracle) } | ||
| } | ||
|
|
||
| /// Create dummy `MajorSyncOracle` which always returns true for `finished_syncing` | ||
| pub fn new_dummy() -> Self { | ||
| Self { sync_oracle: None } | ||
| } | ||
|
|
||
| /// Check if node is in major sync | ||
| pub fn finished_syncing(&mut self) -> bool { | ||
| match &mut self.sync_oracle { | ||
| Some(sync_oracle) => | ||
| if !sync_oracle.is_major_syncing() { | ||
| self.sync_oracle = None; | ||
| true | ||
| } else { | ||
| false | ||
| }, | ||
| None => true, | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /// Create a new instance of the [`Overseer`] with a fixed set of [`Subsystem`]s. | ||
| /// | ||
| /// This returns the overseer along with an [`OverseerHandle`] which can | ||
|
|
@@ -629,6 +664,9 @@ pub struct Overseer<SupportsParachains> { | |
|
|
||
| /// Various Prometheus metrics. | ||
| pub metrics: OverseerMetrics, | ||
|
|
||
| /// SyncOracle is used to detect when initial full node sync is complete | ||
| pub sync_oracle: MajorSyncOracle, | ||
| } | ||
|
|
||
| /// Spawn the metrics metronome task. | ||
|
|
@@ -725,68 +763,159 @@ where | |
| let metrics = self.metrics.clone(); | ||
| spawn_metronome_metrics(&mut self, metrics)?; | ||
|
|
||
| // Notify about active leaves on startup before starting the loop | ||
| let initial_sync_finished = self.sync_oracle.finished_syncing(); | ||
| // Import the active leaves found in the database | ||
| for (hash, number) in std::mem::take(&mut self.leaves) { | ||
| let _ = self.active_leaves.insert(hash, number); | ||
| if let Some((span, status)) = self.on_head_activated(&hash, None).await { | ||
| let update = | ||
| ActiveLeavesUpdate::start_work(ActivatedLeaf { hash, number, status, span }); | ||
| self.broadcast_signal(OverseerSignal::ActiveLeaves(update)).await?; | ||
| if initial_sync_finished { | ||
| // Initial sync is complete. Notify the subsystems and proceed to the main loop | ||
| let update = ActiveLeavesUpdate::start_work(ActivatedLeaf { | ||
| hash, | ||
| number, | ||
| status, | ||
| span, | ||
| }); | ||
| self.broadcast_signal(OverseerSignal::ActiveLeaves(update)).await?; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| loop { | ||
| // If initial sync is not complete - wait for it. | ||
| // This loop is identical to the main one but doesn't generate `ActiveLeaves` and `BlockFinalized` events until | ||
| // the initial full sync is complete. | ||
| // This is an infinite loop which executes only when `!initial_sync_finished`. The weird syntax is only to save | ||
| // one extra layer if indentation because it's already a bit tough for the eyes. | ||
| // Think about it like: | ||
| // ``` | ||
| // if !initial_sync_finished { | ||
| // loop { | ||
| // select! { | ||
| // ... | ||
| // } | ||
| // } | ||
| // } | ||
| //``` | ||
tdimitrov marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| while !initial_sync_finished { | ||
| select! { | ||
| msg = self.events_rx.select_next_some() => { | ||
| match msg { | ||
| Event::MsgToSubsystem { msg, origin } => { | ||
| self.route_message(msg.into(), origin).await?; | ||
| self.metrics.on_message_relayed(); | ||
| } | ||
| Event::Stop => { | ||
| self.stop().await; | ||
| return Ok(()); | ||
| } | ||
| Event::BlockImported(block) => { | ||
| self.block_imported(block).await?; | ||
| } | ||
| Event::BlockFinalized(block) => { | ||
| self.block_finalized(block).await?; | ||
| } | ||
| Event::ExternalRequest(request) => { | ||
| self.handle_external_request(request); | ||
| } | ||
| Event::MsgToSubsystem { msg, origin } => self.handle_msg_to_subsystem(msg, origin).await?, | ||
| Event::Stop => return self.handle_stop().await, | ||
|
||
| Event::BlockImported(block) => _ = self.block_imported(block).await, | ||
| Event::BlockFinalized(block) if self.sync_oracle.finished_syncing() => { | ||
| // Initial sync is complete | ||
ordian marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| self.block_finalized(&block).await; | ||
| // Send initial `ActiveLeaves` | ||
| for (hash, number) in self.active_leaves.clone().into_iter() { | ||
ordian marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| let span = match self.span_per_active_leaf.get(&hash) { | ||
| Some(span) => span.clone(), | ||
| None => { | ||
| // This should never happen. Spans are generated in `on_head_activated` | ||
| // which is called from `block_imported`. Despite not sending a signal | ||
| // `BlockImported` events are handled so a span should exist for each | ||
| // active leaf. | ||
| gum::error!( | ||
| target: LOG_TARGET, | ||
| ?hash, | ||
| ?number, | ||
| "Span for active leaf not found. This is not expected" | ||
| ); | ||
| let span = Arc::new(jaeger::Span::new(hash, "leaf-activated")); | ||
| self.span_per_active_leaf.insert(hash, span.clone()); | ||
| span | ||
| } | ||
| }; | ||
|
|
||
| let update = ActiveLeavesUpdate::start_work(ActivatedLeaf { | ||
| hash, | ||
| number, | ||
| status: LeafStatus::Fresh, | ||
| span, | ||
| }); | ||
| self.broadcast_signal(OverseerSignal::ActiveLeaves(update)).await?; | ||
| } | ||
| // Send initial `BlockFinalized` | ||
| self.broadcast_signal(OverseerSignal::BlockFinalized(block.hash, block.number)).await?; | ||
| break | ||
| }, | ||
| Event::BlockFinalized(block) => _ = self.block_finalized(&block).await, | ||
| Event::ExternalRequest(request) => self.handle_external_request(request) | ||
| } | ||
| }, | ||
| msg = self.to_orchestra_rx.select_next_some() => { | ||
| msg = self.to_orchestra_rx.select_next_some() => self.handle_orchestra_rx(msg), | ||
| res = self.running_subsystems.select_next_some() => return self.handle_running_subsystems(res).await | ||
| } | ||
| } | ||
|
|
||
| // main loop | ||
| loop { | ||
| select! { | ||
| msg = self.events_rx.select_next_some() => { | ||
| match msg { | ||
| ToOrchestra::SpawnJob { name, subsystem, s } => { | ||
| self.spawn_job(name, subsystem, s); | ||
| } | ||
| ToOrchestra::SpawnBlockingJob { name, subsystem, s } => { | ||
| self.spawn_blocking_job(name, subsystem, s); | ||
| } | ||
| Event::MsgToSubsystem { msg, origin } => self.handle_msg_to_subsystem(msg, origin).await?, | ||
| Event::Stop => return self.handle_stop().await, | ||
| Event::BlockImported(block) => { | ||
| let update = self.block_imported(block).await; | ||
| if !update.is_empty() { | ||
| self.broadcast_signal(OverseerSignal::ActiveLeaves(update)).await?; | ||
| } | ||
| }, | ||
| Event::BlockFinalized(block) => _ = self.block_finalized(&block).await, | ||
| Event::ExternalRequest(request) => self.handle_external_request(request) | ||
| } | ||
| }, | ||
| res = self.running_subsystems.select_next_some() => { | ||
| gum::error!( | ||
| target: LOG_TARGET, | ||
| subsystem = ?res, | ||
| "subsystem finished unexpectedly", | ||
| ); | ||
| self.stop().await; | ||
| return res; | ||
| }, | ||
| msg = self.to_orchestra_rx.select_next_some() => self.handle_orchestra_rx(msg), | ||
| res = self.running_subsystems.select_next_some() => return self.handle_running_subsystems(res).await | ||
| } | ||
| } | ||
| } | ||
|
|
||
| async fn block_imported(&mut self, block: BlockInfo) -> SubsystemResult<()> { | ||
| async fn handle_stop(self) -> Result<(), SubsystemError> { | ||
| self.stop().await; | ||
| return Ok(()) | ||
tdimitrov marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| fn handle_orchestra_rx(&mut self, msg: ToOrchestra) { | ||
| match msg { | ||
| ToOrchestra::SpawnJob { name, subsystem, s } => { | ||
| self.spawn_job(name, subsystem, s); | ||
| }, | ||
| ToOrchestra::SpawnBlockingJob { name, subsystem, s } => { | ||
| self.spawn_blocking_job(name, subsystem, s); | ||
| }, | ||
| } | ||
| } | ||
|
|
||
| async fn handle_msg_to_subsystem( | ||
| &mut self, | ||
| msg: AllMessages, | ||
| origin: &'static str, | ||
| ) -> Result<(), SubsystemError> { | ||
| self.route_message(msg.into(), origin).await?; | ||
| self.metrics.on_message_relayed(); | ||
| Ok(()) | ||
| } | ||
|
|
||
| async fn handle_running_subsystems( | ||
| self, | ||
| res: Result<(), SubsystemError>, | ||
| ) -> Result<(), SubsystemError> { | ||
| gum::error!( | ||
| target: LOG_TARGET, | ||
| subsystem = ?res, | ||
| "subsystem finished unexpectedly", | ||
| ); | ||
| self.stop().await; | ||
| return res | ||
| } | ||
|
|
||
| async fn block_imported(&mut self, block: BlockInfo) -> ActiveLeavesUpdate { | ||
| match self.active_leaves.entry(block.hash) { | ||
| hash_map::Entry::Vacant(entry) => entry.insert(block.number), | ||
| hash_map::Entry::Occupied(entry) => { | ||
| debug_assert_eq!(*entry.get(), block.number); | ||
| return Ok(()) | ||
| return ActiveLeavesUpdate::default() | ||
| }, | ||
| }; | ||
|
|
||
|
|
@@ -808,13 +937,10 @@ where | |
|
|
||
| self.clean_up_external_listeners(); | ||
|
|
||
| if !update.is_empty() { | ||
| self.broadcast_signal(OverseerSignal::ActiveLeaves(update)).await?; | ||
| } | ||
| Ok(()) | ||
| update | ||
| } | ||
|
|
||
| async fn block_finalized(&mut self, block: BlockInfo) -> SubsystemResult<()> { | ||
| async fn block_finalized(&mut self, block: &BlockInfo) -> ActiveLeavesUpdate { | ||
| let mut update = ActiveLeavesUpdate::default(); | ||
|
|
||
| self.active_leaves.retain(|h, n| { | ||
|
|
@@ -832,17 +958,7 @@ where | |
| self.on_head_deactivated(deactivated) | ||
| } | ||
|
|
||
| self.broadcast_signal(OverseerSignal::BlockFinalized(block.hash, block.number)) | ||
| .await?; | ||
|
|
||
| // If there are no leaves being deactivated, we don't need to send an update. | ||
| // | ||
| // Our peers will be informed about our finalized block the next time we activating/deactivating some leaf. | ||
| if !update.is_empty() { | ||
| self.broadcast_signal(OverseerSignal::ActiveLeaves(update)).await?; | ||
| } | ||
|
|
||
| Ok(()) | ||
| update | ||
| } | ||
|
|
||
| /// Handles a header activation. If the header's state doesn't support the parachains API, | ||
|
|
@@ -861,7 +977,7 @@ where | |
| gum::trace!( | ||
| target: LOG_TARGET, | ||
| relay_parent = ?hash, | ||
| "Leaf got activated, notifying exterinal listeners" | ||
| "Leaf got activated, notifying external listeners" | ||
| ); | ||
| for listener in listeners { | ||
| // it's fine if the listener is no longer interested | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.