From 6f2e0e919ea4418273f7a5400d57cb90a3bb987d Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Thu, 4 Feb 2021 10:05:07 +0100 Subject: [PATCH 01/60] WIP --- .../availability-distribution/src/lib.rs | 1195 +---------------- primitives/src/v0.rs | 3 +- 2 files changed, 67 insertions(+), 1131 deletions(-) diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs index a35be6a3a219..9cd1f81337ab 100644 --- a/node/network/availability-distribution/src/lib.rs +++ b/node/network/availability-distribution/src/lib.rs @@ -1,4 +1,5 @@ -// Copyright 2020 Parity Technologies (UK) Ltd. + +// Copyright 2021 Parity Technologies (UK) Ltd. // This file is part of Polkadot. // Polkadot is free software: you can redistribute it and/or modify @@ -14,885 +15,91 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -//! The availability distribution -//! -//! Transforms `AvailableData` into erasure chunks, which are distributed to peers -//! who are interested in the relevant candidates. -//! Gossip messages received from other peers are verified and gossiped to interested -//! peers. Verified in this context means, the erasure chunks contained merkle proof -//! is checked. - -#![deny(unused_crate_dependencies, unused_qualifications)] - -use parity_scale_codec::{Decode, Encode}; -use futures::{channel::oneshot, FutureExt, TryFutureExt}; - -use sp_core::crypto::Public; -use sp_keystore::{CryptoStore, SyncCryptoStorePtr}; - -use polkadot_erasure_coding::branch_hash; -use polkadot_node_network_protocol::{ - v1 as protocol_v1, PeerId, ReputationChange as Rep, View, OurView, -}; -use polkadot_node_subsystem_util::metrics::{self, prometheus}; -use polkadot_primitives::v1::{ - BlakeTwo256, CoreState, ErasureChunk, Hash, HashT, - SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID, CandidateHash, - CandidateDescriptor, -}; -use polkadot_subsystem::messages::{ - AllMessages, AvailabilityDistributionMessage, AvailabilityStoreMessage, ChainApiMessage, - NetworkBridgeMessage, RuntimeApiMessage, RuntimeApiRequest, NetworkBridgeEvent -}; -use polkadot_subsystem::{ - jaeger, errors::{ChainApiError, RuntimeApiError}, PerLeafSpan, - ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem, Subsystem, SubsystemContext, SubsystemError, -}; -use std::collections::{HashMap, HashSet}; -use std::collections::hash_map::Entry; -use std::iter; -use thiserror::Error; - -#[cfg(test)] -mod tests; - -const LOG_TARGET: &'static str = "availability_distribution"; - -#[derive(Debug, Error)] -enum Error { - #[error("Response channel to obtain StoreChunk failed")] - StoreChunkResponseChannel(#[source] oneshot::Canceled), - - #[error("Response channel to obtain QueryChunk failed")] - QueryChunkResponseChannel(#[source] oneshot::Canceled), - - #[error("Response channel to obtain QueryAncestors failed")] - QueryAncestorsResponseChannel(#[source] oneshot::Canceled), - #[error("RuntimeAPI to obtain QueryAncestors failed")] - QueryAncestors(#[source] ChainApiError), - - #[error("Response channel to obtain QuerySession failed")] - QuerySessionResponseChannel(#[source] oneshot::Canceled), - #[error("RuntimeAPI to obtain QuerySession failed")] - QuerySession(#[source] RuntimeApiError), - - #[error("Response channel to obtain QueryValidators failed")] - QueryValidatorsResponseChannel(#[source] oneshot::Canceled), - #[error("RuntimeAPI to obtain QueryValidators failed")] - QueryValidators(#[source] RuntimeApiError), - - #[error("Response channel to obtain AvailabilityCores failed")] - AvailabilityCoresResponseChannel(#[source] oneshot::Canceled), - #[error("RuntimeAPI to obtain AvailabilityCores failed")] - AvailabilityCores(#[source] RuntimeApiError), - - #[error("Response channel to obtain AvailabilityCores failed")] - QueryAvailabilityResponseChannel(#[source] oneshot::Canceled), - - #[error("Receive channel closed")] - IncomingMessageChannel(#[source] SubsystemError), -} - -type Result = std::result::Result; - -const COST_MERKLE_PROOF_INVALID: Rep = Rep::new(-100, "Merkle proof was invalid"); -const COST_NOT_A_LIVE_CANDIDATE: Rep = Rep::new(-51, "Candidate is not live"); -const COST_PEER_DUPLICATE_MESSAGE: Rep = Rep::new(-500, "Peer sent identical messages"); -const BENEFIT_VALID_MESSAGE_FIRST: Rep = Rep::new(15, "Valid message with new information"); -const BENEFIT_VALID_MESSAGE: Rep = Rep::new(10, "Valid message"); - -/// Checked signed availability bitfield that is distributed -/// to other peers. -#[derive(Encode, Decode, Debug, Clone, PartialEq, Eq, Hash)] -pub struct AvailabilityGossipMessage { - /// Anchor hash of the candidate the `ErasureChunk` is associated to. - pub candidate_hash: CandidateHash, - /// The erasure chunk, a encoded information part of `AvailabilityData`. - pub erasure_chunk: ErasureChunk, -} - -impl From for protocol_v1::AvailabilityDistributionMessage { - fn from(message: AvailabilityGossipMessage) -> Self { - Self::Chunk(message.candidate_hash, message.erasure_chunk) - } -} - -/// Data used to track information of peers and relay parents the -/// overseer ordered us to work on. -#[derive(Debug, Default)] -struct ProtocolState { - /// Track all active peers and their views - /// to determine what is relevant to them. - peer_views: HashMap, - - /// Our own view. - view: OurView, - - /// Caches a mapping of relay parents or ancestor to live candidate hashes. - /// Allows fast intersection of live candidates with views and consecutive unioning. - /// Maps relay parent / ancestor -> candidate hashes. - live_under: HashMap>, - - /// Track things needed to start and stop work on a particular relay parent. - per_relay_parent: HashMap, - - /// Track data that is specific to a candidate. - per_candidate: HashMap, +/// The bitfield distribution subsystem. +pub struct AvailabilityDistributionSubsystem { + /// Pointer to a keystore, which is required for determining this nodes validator index. + keystore: SyncCryptoStorePtr, + /// Prometheus metrics. + metrics: Metrics, } +/// Metadata about a candidate that is part of the live_candidates set. +/// +/// Those which were not present in a cache are "fresh" and have their candidate descriptor attached. This +/// information is propagated to the higher level where it can be used to create data entries. Cached candidates +/// already have entries associated with them, and thus don't need this metadata to be fetched. #[derive(Debug)] -struct PerCandidate { - /// A Candidate and a set of known erasure chunks in form of messages to be gossiped / distributed if the peer view wants that. - /// This is _across_ peers and not specific to a particular one. - /// candidate hash + erasure chunk index -> gossip message - message_vault: HashMap, - - /// Track received erasure chunk indices per peer. - received_messages: HashMap>, - - /// Track sent erasure chunk indices per peer. - sent_messages: HashMap>, - - /// The set of validators. - validators: Vec, - - /// If this node is a validator, note the index in the validator set. - validator_index: Option, - - /// The descriptor of this candidate. - descriptor: CandidateDescriptor, - - /// The set of relay chain blocks this appears to be live in. - live_in: HashSet, - - /// A Jaeger span relating to this candidate. - span: jaeger::JaegerSpan, -} - -impl PerCandidate { - /// Returns `true` iff the given `validator_index` is required by the given `peer`. - fn message_required_by_peer(&self, peer: &PeerId, validator_index: ValidatorIndex) -> bool { - self.received_messages.get(peer).map(|v| !v.contains(&validator_index)).unwrap_or(true) - && self.sent_messages.get(peer).map(|v| !v.contains(&validator_index)).unwrap_or(true) - } - - /// Add a chunk to the message vault. Overwrites anything that was already present. - fn add_message(&mut self, chunk_index: u32, message: AvailabilityGossipMessage) { - let _ = self.message_vault.insert(chunk_index, message); - } - - /// Clean up the span if we've got our own chunk. - fn drop_span_after_own_availability(&mut self) { - if let Some(validator_index) = self.validator_index { - if self.message_vault.contains_key(&validator_index) { - self.span = jaeger::JaegerSpan::Disabled; - } - } - } +enum FetchedLiveCandidate { + Cached, + Fresh(CandidateDescriptor), } -#[derive(Debug)] -struct PerRelayParent { - /// Set of `K` ancestors for this relay parent. - ancestors: Vec, - /// Live candidates, according to this relay parent. - live_candidates: HashSet, - /// The span that belongs to this relay parent. - span: PerLeafSpan, -} +struct ProtocolState { + /// Candidates we need to fetch our chunk for. + chunks_to_fetch: HashMap, -impl ProtocolState { - /// Unionize all live candidate hashes of the given relay parents and their recent - /// ancestors. + /// Localized information about sessions we are currently interested in. /// - /// Ignores all non existent relay parents, so this can be used directly with a peers view. - /// Returns a set of candidate hashes. - #[tracing::instrument(level = "trace", skip(relay_parents), fields(subsystem = LOG_TARGET))] - fn cached_live_candidates_unioned<'a>( - &'a self, - relay_parents: impl IntoIterator + 'a, - ) -> HashSet { - cached_live_candidates_unioned( - &self.per_relay_parent, - relay_parents - ) - } - - #[tracing::instrument(level = "trace", skip(candidates, span), fields(subsystem = LOG_TARGET))] - fn add_relay_parent( - &mut self, - relay_parent: Hash, - validators: Vec, - validator_index: Option, - candidates: HashMap, - ancestors: Vec, - span: PerLeafSpan, - ) { - let per_relay_parent = self.per_relay_parent.entry(relay_parent).or_insert_with(|| PerRelayParent { - span, - ancestors, - live_candidates: candidates.keys().cloned().collect(), - }); - - // register the relation of relay_parent to candidate.. - for (receipt_hash, fetched) in candidates { - let candidate_entry = match self.per_candidate.entry(receipt_hash) { - Entry::Occupied(e) => e.into_mut(), - Entry::Vacant(e) => { - if let FetchedLiveCandidate::Fresh(descriptor) = fetched { - e.insert(PerCandidate { - message_vault: HashMap::new(), - received_messages: HashMap::new(), - sent_messages: HashMap::new(), - validators: validators.clone(), - validator_index, - descriptor, - live_in: HashSet::new(), - span: if validator_index.is_some() { - jaeger::candidate_hash_span(&receipt_hash, "pending-availability") - } else { - jaeger::JaegerSpan::Disabled - }, - }) - } else { - tracing::warn!(target: LOG_TARGET, "No `per_candidate` but not fresh. logic error"); - continue; - } - } - }; + /// This is usually the current one and at session boundaries also the last one. + session_infos: HashMap, - // Create some span that will make it able to switch between the candidate and relay parent span. - let mut span = per_relay_parent.span.child("live-candidate"); - span.add_string_tag("candidate-hash", &format!("{:?}", receipt_hash)); - - candidate_entry.span.add_follows_from(&span); - candidate_entry.live_in.insert(relay_parent); - } - } - - #[tracing::instrument(level = "trace", skip(self), fields(subsystem = LOG_TARGET))] - fn remove_relay_parent(&mut self, relay_parent: &Hash) { - if let Some(per_relay_parent) = self.per_relay_parent.remove(relay_parent) { - for candidate_hash in per_relay_parent.live_candidates { - // Prune the candidate if this was the last member of our view - // to consider it live (including its ancestors). - if let Entry::Occupied(mut occ) = self.per_candidate.entry(candidate_hash) { - occ.get_mut().live_in.remove(relay_parent); - if occ.get().live_in.is_empty() { - occ.remove(); - } - } - } - } - } - - /// Removes all entries from live_under which aren't referenced in the ancestry of - /// one of our live relay-chain heads. - fn clean_up_live_under_cache(&mut self) { - let extended_view: HashSet<_> = self.per_relay_parent.iter() - .map(|(r_hash, v)| v.ancestors.iter().cloned().chain(iter::once(*r_hash))) - .flatten() - .collect(); - - self.live_under.retain(|ancestor_hash, _| extended_view.contains(ancestor_hash)); - } } -fn cached_live_candidates_unioned<'a>( - per_relay_parent: &'a HashMap, - relay_parents: impl IntoIterator + 'a, -) -> HashSet { - relay_parents - .into_iter() - .filter_map(|r| per_relay_parent.get(r)) - .map(|per_relay_parent| per_relay_parent.live_candidates.iter().cloned()) - .flatten() - .collect() -} - -/// Deal with network bridge updates and track what needs to be tracked -/// which depends on the message type received. -#[tracing::instrument(level = "trace", skip(ctx, keystore, metrics), fields(subsystem = LOG_TARGET))] -async fn handle_network_msg( - ctx: &mut Context, - keystore: &SyncCryptoStorePtr, - state: &mut ProtocolState, - metrics: &Metrics, - bridge_message: NetworkBridgeEvent, -) -> Result<()> -where - Context: SubsystemContext, -{ - match bridge_message { - NetworkBridgeEvent::PeerConnected(peerid, _role) => { - // insert if none already present - state.peer_views.entry(peerid).or_default(); - } - NetworkBridgeEvent::PeerDisconnected(peerid) => { - // get rid of superfluous data - state.peer_views.remove(&peerid); - } - NetworkBridgeEvent::PeerViewChange(peerid, view) => { - handle_peer_view_change(ctx, state, peerid, view, metrics).await; - } - NetworkBridgeEvent::OurViewChange(view) => { - handle_our_view_change(ctx, keystore, state, view, metrics).await?; - } - NetworkBridgeEvent::PeerMessage(remote, msg) => { - let gossiped_availability = match msg { - protocol_v1::AvailabilityDistributionMessage::Chunk(candidate_hash, chunk) => { - AvailabilityGossipMessage { - candidate_hash, - erasure_chunk: chunk, - } - } - }; - - process_incoming_peer_message(ctx, state, remote, gossiped_availability, metrics) - .await?; - } - } - Ok(()) -} - -/// Handle the changes necessary when our view changes. -#[tracing::instrument(level = "trace", skip(ctx, keystore, metrics), fields(subsystem = LOG_TARGET))] -async fn handle_our_view_change( - ctx: &mut Context, - keystore: &SyncCryptoStorePtr, - state: &mut ProtocolState, - view: OurView, - metrics: &Metrics, -) -> Result<()> -where - Context: SubsystemContext, -{ - let _timer = metrics.time_handle_our_view_change(); - - let old_view = std::mem::replace(&mut state.view, view); - - // needed due to borrow rules - let view = state.view.clone(); - - // add all the relay parents and fill the cache - for (added, span) in view.span_per_head().iter().filter(|v| !old_view.contains(&v.0)) { - let span = PerLeafSpan::new(span.clone(), "availability-distribution"); - - let validators = query_validators(ctx, *added).await?; - let validator_index = obtain_our_validator_index(&validators, keystore.clone()).await; - let (candidates, ancestors) - = query_live_candidates(ctx, &mut state.live_under, *added).await?; - - state.add_relay_parent( - *added, - validators, - validator_index, - candidates, - ancestors, - span, - ); - } - - // handle all candidates - let mut messages_out = Vec::new(); - for candidate_hash in state.cached_live_candidates_unioned(view.difference(&old_view)) { - // If we are not a validator for this candidate, let's skip it. - match state.per_candidate.get(&candidate_hash) { - None => continue, - Some(c) if c.validator_index.is_none() => continue, - Some(_) => {}, - }; - - // check if the availability is present in the store exists - if !query_data_availability(ctx, candidate_hash).await? { - continue; - } - - // obtain interested peers in the candidate hash - let peers: Vec = state - .peer_views - .clone() - .into_iter() - .filter(|(_peer, view)| { - // collect all direct interests of a peer w/o ancestors - state - .cached_live_candidates_unioned(view.heads.iter()) - .contains(&candidate_hash) - }) - .map(|(peer, _view)| peer.clone()) - .collect(); - - let per_candidate = state.per_candidate.get_mut(&candidate_hash) - .expect("existence checked above; qed"); - - let validator_count = per_candidate.validators.len(); - - // distribute all erasure messages to interested peers - for chunk_index in 0u32..(validator_count as u32) { - let _span = { - let mut span = per_candidate.span.child("load-and-distribute"); - span.add_string_tag("chunk-index", &format!("{}", chunk_index)); - span - }; - let message = if let Some(message) = per_candidate.message_vault.get(&chunk_index) { - tracing::trace!( - target: LOG_TARGET, - %chunk_index, - ?candidate_hash, - "Retrieved chunk from message vault", - ); - message.clone() - } else if let Some(erasure_chunk) = query_chunk(ctx, candidate_hash, chunk_index as ValidatorIndex).await? { - tracing::trace!( - target: LOG_TARGET, - %chunk_index, - ?candidate_hash, - "Retrieved chunk from availability storage", - ); - - let msg = AvailabilityGossipMessage { - candidate_hash, - erasure_chunk, - }; - - per_candidate.add_message(chunk_index, msg.clone()); - - msg - } else { - tracing::error!( - target: LOG_TARGET, - %chunk_index, - ?candidate_hash, - "Availability store reported that we have the availability data, but we could not retrieve a chunk of it!", - ); - continue; - }; - - debug_assert_eq!(message.erasure_chunk.index, chunk_index); - - let peers = peers - .iter() - .filter(|peer| per_candidate.message_required_by_peer(peer, chunk_index)) - .cloned() - .collect::>(); - - add_tracked_messages_to_batch(&mut messages_out, per_candidate, metrics, peers, iter::once(message)); - } - - // traces are better if we wait until the loop is done to drop. - per_candidate.drop_span_after_own_availability(); - } - - // send all batched messages out. - send_batch_to_network(ctx, messages_out).await; - - // cleanup the removed relay parents and their states - old_view.difference(&view).for_each(|r| state.remove_relay_parent(r)); - state.clean_up_live_under_cache(); - - Ok(()) -} - -// After this function is invoked, the state reflects the messages as having been sent to a peer. -#[tracing::instrument(level = "trace", skip(batch, metrics, message_iter), fields(subsystem = LOG_TARGET))] -fn add_tracked_messages_to_batch( - batch: &mut Vec<(Vec, protocol_v1::ValidationProtocol)>, - per_candidate: &mut PerCandidate, - metrics: &Metrics, - peers: Vec, - message_iter: impl IntoIterator, -) { - for message in message_iter { - for peer in peers.iter() { - per_candidate - .sent_messages - .entry(peer.clone()) - .or_default() - .insert(message.erasure_chunk.index); - } - - if !peers.is_empty() { - batch.push(( - peers.clone(), - protocol_v1::ValidationProtocol::AvailabilityDistribution(message.into()), - )); - - metrics.on_chunk_distributed(); - } - } -} - -async fn send_batch_to_network( - ctx: &mut impl SubsystemContext, - batch: Vec<(Vec, protocol_v1::ValidationProtocol)>, -) { - if !batch.is_empty() { - ctx.send_message(NetworkBridgeMessage::SendValidationMessages(batch).into()).await - } +/// Localized session information, tailored for the needs of availability distribution. +struct SessionInfo { + /// For each core we maintain a randomized list of corresponding validators. + /// + /// This is so we can query them for chunks, trying them in order. As each validator will + /// have a randomized ordering, we should get good load balancing. + validator_groups: Vec>, } -// Send the difference between two views which were not sent -// to that particular peer. -#[tracing::instrument(level = "trace", skip(ctx, metrics), fields(subsystem = LOG_TARGET))] -async fn handle_peer_view_change( - ctx: &mut Context, - state: &mut ProtocolState, - origin: PeerId, - view: View, - metrics: &Metrics, -) -where - Context: SubsystemContext, -{ - let current = state.peer_views.entry(origin.clone()).or_default(); - - let added: Vec = view.difference(&*current).cloned().collect(); - - *current = view; - - if added.is_empty() { - return - } - - // only contains the intersection of what we are interested and - // the union of all relay parent's candidates. - let added_candidates = state.cached_live_candidates_unioned(added.iter()); - - // Send all messages we've seen before and the peer is now interested in. - let mut batch = Vec::new(); - for candidate_hash in added_candidates { - let per_candidate = match state.per_candidate.get_mut(&candidate_hash) { - Some(p) => p, - None => continue, - }; - - // obtain the relevant chunk indices not sent yet - let messages = ((0 as ValidatorIndex)..(per_candidate.validators.len() as ValidatorIndex)) - .into_iter() - .filter_map(|erasure_chunk_index: ValidatorIndex| { - // try to pick up the message from the message vault - // so we send as much as we have - per_candidate - .message_vault - .get(&erasure_chunk_index) - .filter(|_| per_candidate.message_required_by_peer(&origin, erasure_chunk_index)) - }) - .cloned() - .collect::>(); - - add_tracked_messages_to_batch(&mut batch, per_candidate, metrics, vec![origin.clone()], messages); - } - - send_batch_to_network(ctx, batch).await; +struct ChunkFetchingInfo { + descriptor: CandidateDescriptor, + /// Validators that backed the candidate and hopefully have our chunk. + backing_group: Vec, } -/// Obtain the first key which has a signing key. -/// Returns the index within the validator set as `ValidatorIndex`, if there exists one, -/// otherwise, `None` is returned. -async fn obtain_our_validator_index( - validators: &[ValidatorId], - keystore: SyncCryptoStorePtr, -) -> Option { - for (idx, validator) in validators.iter().enumerate() { - if CryptoStore::has_keys( - &*keystore, - &[(validator.to_raw_vec(), PARACHAIN_KEY_TYPE_ID)], - ) - .await - { - return Some(idx as ValidatorIndex); - } - } - None +fn run() { + /// Get current heads + /// For each chunk/slot, update randomized list of validators to query on session bundaries. + /// Fetch pending availability candidates and add them to `chunks_to_fetch`. } -/// Handle an incoming message from a peer. -#[tracing::instrument(level = "trace", skip(ctx, metrics), fields(subsystem = LOG_TARGET))] -async fn process_incoming_peer_message( +/// Obtain all live candidates under a particular relay head. This implicitly includes +/// `K` ancestors of the head, such that the candidates pending availability in all of +/// the states of the head and the ancestors are unioned together to produce the +/// return type of this function. Each candidate hash is paired with information about +/// from where it was fetched. +/// +/// This also updates all `live_under` cached by the protocol state and returns a list +/// of up to `K` ancestors of the relay-parent. +#[tracing::instrument(level = "trace", skip(ctx, live_under), fields(subsystem = LOG_TARGET))] +async fn query_live_candidates( ctx: &mut Context, - state: &mut ProtocolState, - origin: PeerId, - message: AvailabilityGossipMessage, - metrics: &Metrics, -) -> Result<()> + live_under: &mut HashMap>, + relay_parent: Hash, +) -> Result<(HashMap, Vec)> where Context: SubsystemContext, { - let _timer = metrics.time_process_incoming_peer_message(); - - // obtain the set of candidates we are interested in based on our current view - let live_candidates = state.cached_live_candidates_unioned(state.view.heads.iter()); - - // check if the candidate is of interest - let candidate_entry = if live_candidates.contains(&message.candidate_hash) { - state.per_candidate - .get_mut(&message.candidate_hash) - .expect("All live candidates are contained in per_candidate; qed") - } else { - tracing::trace!( - target: LOG_TARGET, - candidate_hash = ?message.candidate_hash, - peer = %origin, - "Peer send not live candidate", - ); - modify_reputation(ctx, origin, COST_NOT_A_LIVE_CANDIDATE).await; - return Ok(()) - }; - - // Handle a duplicate before doing expensive checks. - if let Some(existing) = candidate_entry.message_vault.get(&message.erasure_chunk.index) { - let span = candidate_entry.span.child("handle-duplicate"); - // check if this particular erasure chunk was already sent by that peer before - { - let _span = span.child("check-entry"); - let received_set = candidate_entry - .received_messages - .entry(origin.clone()) - .or_default(); - - if !received_set.insert(message.erasure_chunk.index) { - modify_reputation(ctx, origin, COST_PEER_DUPLICATE_MESSAGE).await; - return Ok(()); - } - } - - // check that the message content matches what we have already before rewarding - // the peer. - { - let _span = span.child("check-accurate"); - if existing == &message { - modify_reputation(ctx, origin, BENEFIT_VALID_MESSAGE).await; - } else { - modify_reputation(ctx, origin, COST_MERKLE_PROOF_INVALID).await; - } - } - - return Ok(()); - } - - let span = { - let mut span = candidate_entry.span.child("process-new-chunk"); - span.add_string_tag("peer-id", &origin.to_base58()); - span - }; - - // check the merkle proof against the erasure root in the candidate descriptor. - let anticipated_hash = { - let _span = span.child("check-merkle-root"); - match branch_hash( - &candidate_entry.descriptor.erasure_root, - &message.erasure_chunk.proof, - message.erasure_chunk.index as usize, - ) { - Ok(hash) => hash, - Err(e) => { - tracing::trace!( - target: LOG_TARGET, - candidate_hash = ?message.candidate_hash, - peer = %origin, - error = ?e, - "Failed to calculate chunk merkle proof", - ); - modify_reputation(ctx, origin, COST_MERKLE_PROOF_INVALID).await; - return Ok(()); - }, - } - }; - - { - let _span = span.child("check-chunk-hash"); - let erasure_chunk_hash = BlakeTwo256::hash(&message.erasure_chunk.chunk); - if anticipated_hash != erasure_chunk_hash { - tracing::trace!( - target: LOG_TARGET, - candidate_hash = ?message.candidate_hash, - peer = %origin, - "Peer sent chunk with invalid merkle proof", - ); - modify_reputation(ctx, origin, COST_MERKLE_PROOF_INVALID).await; - return Ok(()); - } - } - - { - // insert into known messages and change reputation. we've guaranteed - // above that the message vault doesn't contain any message under this - // chunk index already. - - candidate_entry - .received_messages - .entry(origin.clone()) - .or_default() - .insert(message.erasure_chunk.index); - - modify_reputation(ctx, origin, BENEFIT_VALID_MESSAGE_FIRST).await; - - // save the chunk for our index - if Some(message.erasure_chunk.index) == candidate_entry.validator_index { - let _span = span.child("store-our-chunk"); - if store_chunk( - ctx, - message.candidate_hash, - candidate_entry.descriptor.relay_parent, - message.erasure_chunk.index, - message.erasure_chunk.clone(), - ).await?.is_err() { - tracing::warn!( - target: LOG_TARGET, - "Failed to store erasure chunk to availability store" - ); - } - } - - candidate_entry.add_message(message.erasure_chunk.index, message.clone()); - candidate_entry.drop_span_after_own_availability(); - } - - // condense the peers to the peers with interest on the candidate - let peers = { - let _span = span.child("determine-recipient-peers"); - let per_relay_parent = &state.per_relay_parent; - - state - .peer_views - .clone() - .into_iter() - .filter(|(_, view)| { - // peers view must contain the candidate hash too - cached_live_candidates_unioned( - per_relay_parent, - view.heads.iter(), - ).contains(&message.candidate_hash) - }) - .map(|(peer, _)| -> PeerId { peer.clone() }) - .filter(|peer| candidate_entry.message_required_by_peer(peer, message.erasure_chunk.index)) - .collect::>() - }; - - drop(span); - // gossip that message to interested peers - let mut batch = Vec::new(); - add_tracked_messages_to_batch(&mut batch, candidate_entry, metrics, peers, iter::once(message)); - send_batch_to_network(ctx, batch).await; - - Ok(()) -} - -/// The bitfield distribution subsystem. -pub struct AvailabilityDistributionSubsystem { - /// Pointer to a keystore, which is required for determining this nodes validator index. - keystore: SyncCryptoStorePtr, - /// Prometheus metrics. - metrics: Metrics, -} - -impl AvailabilityDistributionSubsystem { - /// Number of ancestors to keep around for the relay-chain heads. - const K: usize = 3; - - /// Create a new instance of the availability distribution. - pub fn new(keystore: SyncCryptoStorePtr, metrics: Metrics) -> Self { - Self { keystore, metrics } - } - - /// Start processing work as passed on from the Overseer. - async fn run(self, ctx: Context) -> Result<()> - where - Context: SubsystemContext, - { - let mut state = ProtocolState { - peer_views: HashMap::new(), - view: Default::default(), - live_under: HashMap::new(), - per_relay_parent: HashMap::new(), - per_candidate: HashMap::new(), - }; - - self.run_inner(ctx, &mut state).await - } - - /// Start processing work. - #[tracing::instrument(skip(self, ctx), fields(subsystem = LOG_TARGET))] - async fn run_inner(self, mut ctx: Context, state: &mut ProtocolState) -> Result<()> - where - Context: SubsystemContext, - { - // work: process incoming messages from the overseer. - loop { - let message = ctx - .recv() - .await - .map_err(|e| Error::IncomingMessageChannel(e))?; - match message { - FromOverseer::Communication { - msg: AvailabilityDistributionMessage::NetworkBridgeUpdateV1(event), - } => { - if let Err(e) = handle_network_msg( - &mut ctx, - &self.keystore.clone(), - state, - &self.metrics, - event, - ) - .await - { - tracing::warn!( - target: LOG_TARGET, - err = ?e, - "Failed to handle incoming network messages", - ); - } - } - FromOverseer::Communication { - msg: AvailabilityDistributionMessage::AvailabilityFetchingRequest(_), - } => { - // TODO: Implement issue 2306: - tracing::warn!( - target: LOG_TARGET, - "To be implemented, see: https://github.com/paritytech/polkadot/issues/2306 !", - ); - } - FromOverseer::Signal(OverseerSignal::ActiveLeaves(ActiveLeavesUpdate { - activated: _, - deactivated: _, - })) => { - // handled at view change - } - FromOverseer::Signal(OverseerSignal::BlockFinalized(..)) => {} - FromOverseer::Signal(OverseerSignal::Conclude) => { - return Ok(()); - } - } - } - } -} - -impl Subsystem for AvailabilityDistributionSubsystem -where - Context: SubsystemContext + Sync + Send, -{ - fn start(self, ctx: Context) -> SpawnedSubsystem { - let future = self - .run(ctx) - .map_err(|e| SubsystemError::with_origin("availability-distribution", e)) - .boxed(); + // register one of relay parents (not the ancestors) + let ancestors = query_up_to_k_ancestors_in_same_session( + ctx, + relay_parent, + AvailabilityDistributionSubsystem::K, + ) + .await?; - SpawnedSubsystem { - name: "availability-distribution-subsystem", - future, - } - } -} + // query the ones that were not present in the live_under cache and add them + // to it. + let live_candidates = query_pending_availability_at( + ctx, + ancestors.iter().cloned().chain(iter::once(relay_parent)), + live_under, + ).await?; -/// Metadata about a candidate that is part of the live_candidates set. -/// -/// Those which were not present in a cache are "fresh" and have their candidate descriptor attached. This -/// information is propagated to the higher level where it can be used to create data entries. Cached candidates -/// already have entries associated with them, and thus don't need this metadata to be fetched. -#[derive(Debug)] -enum FetchedLiveCandidate { - Cached, - Fresh(CandidateDescriptor), + Ok((live_candidates, ancestors)) } /// Obtain all live candidates for all given `relay_blocks`. @@ -937,42 +144,6 @@ where Ok(live_candidates) } -/// Obtain all live candidates under a particular relay head. This implicitly includes -/// `K` ancestors of the head, such that the candidates pending availability in all of -/// the states of the head and the ancestors are unioned together to produce the -/// return type of this function. Each candidate hash is paired with information about -/// from where it was fetched. -/// -/// This also updates all `live_under` cached by the protocol state and returns a list -/// of up to `K` ancestors of the relay-parent. -#[tracing::instrument(level = "trace", skip(ctx, live_under), fields(subsystem = LOG_TARGET))] -async fn query_live_candidates( - ctx: &mut Context, - live_under: &mut HashMap>, - relay_parent: Hash, -) -> Result<(HashMap, Vec)> -where - Context: SubsystemContext, -{ - // register one of relay parents (not the ancestors) - let ancestors = query_up_to_k_ancestors_in_same_session( - ctx, - relay_parent, - AvailabilityDistributionSubsystem::K, - ) - .await?; - - // query the ones that were not present in the live_under cache and add them - // to it. - let live_candidates = query_pending_availability_at( - ctx, - ancestors.iter().cloned().chain(iter::once(relay_parent)), - live_under, - ).await?; - - Ok((live_candidates, ancestors)) -} - /// Query all hashes and descriptors of candidates pending availability at a particular block. #[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))] async fn query_pending_availability(ctx: &mut Context, relay_parent: Hash) @@ -1000,239 +171,3 @@ where }) .collect()) } - -/// Modify the reputation of a peer based on its behavior. -#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))] -async fn modify_reputation(ctx: &mut Context, peer: PeerId, rep: Rep) -where - Context: SubsystemContext, -{ - tracing::trace!( - target: LOG_TARGET, - rep = ?rep, - peer_id = ?peer, - "Reputation change for peer", - ); - ctx.send_message(AllMessages::NetworkBridge( - NetworkBridgeMessage::ReportPeer(peer, rep), - )).await; -} - -/// Query the proof of validity for a particular candidate hash. -#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))] -async fn query_data_availability(ctx: &mut Context, candidate_hash: CandidateHash) -> Result -where - Context: SubsystemContext, -{ - let (tx, rx) = oneshot::channel(); - ctx.send_message(AllMessages::AvailabilityStore( - AvailabilityStoreMessage::QueryDataAvailability(candidate_hash, tx), - )).await; - - rx.await.map_err(|e| Error::QueryAvailabilityResponseChannel(e)) -} - -#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))] -async fn query_chunk( - ctx: &mut Context, - candidate_hash: CandidateHash, - validator_index: ValidatorIndex, -) -> Result> -where - Context: SubsystemContext, -{ - let (tx, rx) = oneshot::channel(); - ctx.send_message(AllMessages::AvailabilityStore( - AvailabilityStoreMessage::QueryChunk(candidate_hash, validator_index, tx), - )).await; - - rx.await.map_err(|e| Error::QueryChunkResponseChannel(e)) -} - -#[tracing::instrument(level = "trace", skip(ctx, erasure_chunk), fields(subsystem = LOG_TARGET))] -async fn store_chunk( - ctx: &mut Context, - candidate_hash: CandidateHash, - relay_parent: Hash, - validator_index: ValidatorIndex, - erasure_chunk: ErasureChunk, -) -> Result> -where - Context: SubsystemContext, -{ - let (tx, rx) = oneshot::channel(); - ctx.send_message(AllMessages::AvailabilityStore( - AvailabilityStoreMessage::StoreChunk { - candidate_hash, - relay_parent, - chunk: erasure_chunk, - tx, - } - )).await; - - rx.await.map_err(|e| Error::StoreChunkResponseChannel(e)) -} - -/// Query the validator set. -#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))] -async fn query_validators( - ctx: &mut Context, - relay_parent: Hash, -) -> Result> -where - Context: SubsystemContext, -{ - let (tx, rx) = oneshot::channel(); - let query_validators = AllMessages::RuntimeApi(RuntimeApiMessage::Request( - relay_parent, - RuntimeApiRequest::Validators(tx), - )); - - ctx.send_message(query_validators) - .await; - rx.await - .map_err(|e| Error::QueryValidatorsResponseChannel(e))? - .map_err(|e| Error::QueryValidators(e)) -} - -/// Query the hash of the `K` ancestors -#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))] -async fn query_k_ancestors( - ctx: &mut Context, - relay_parent: Hash, - k: usize, -) -> Result> -where - Context: SubsystemContext, -{ - let (tx, rx) = oneshot::channel(); - let query_ancestors = AllMessages::ChainApi(ChainApiMessage::Ancestors { - hash: relay_parent, - k, - response_channel: tx, - }); - - ctx.send_message(query_ancestors) - .await; - rx.await - .map_err(|e| Error::QueryAncestorsResponseChannel(e))? - .map_err(|e| Error::QueryAncestors(e)) -} - -/// Query the session index of a relay parent -#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))] -async fn query_session_index_for_child( - ctx: &mut Context, - relay_parent: Hash, -) -> Result -where - Context: SubsystemContext, -{ - let (tx, rx) = oneshot::channel(); - let query_session_idx_for_child = AllMessages::RuntimeApi(RuntimeApiMessage::Request( - relay_parent, - RuntimeApiRequest::SessionIndexForChild(tx), - )); - - ctx.send_message(query_session_idx_for_child) - .await; - rx.await - .map_err(|e| Error::QuerySessionResponseChannel(e))? - .map_err(|e| Error::QuerySession(e)) -} - -/// Queries up to k ancestors with the constraints of equiv session -#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))] -async fn query_up_to_k_ancestors_in_same_session( - ctx: &mut Context, - relay_parent: Hash, - k: usize, -) -> Result> -where - Context: SubsystemContext, -{ - // k + 1 since we always query the child's session index - // ordering is [parent, grandparent, greatgrandparent, greatgreatgrandparent, ...] - let ancestors = query_k_ancestors(ctx, relay_parent, k + 1).await?; - let desired_session = query_session_index_for_child(ctx, relay_parent).await?; - // we would only need `ancestors.len() - 1`, but the one extra could avoid a re-alloc - // if the consumer wants to push the `relay_parent` onto it too and does not hurt otherwise - let mut acc = Vec::with_capacity(ancestors.len()); - - // iterate from youngest to oldest - let mut iter = ancestors.into_iter().peekable(); - - while let Some((ancestor, ancestor_parent)) = iter.next().and_then(|a| iter.peek().map(|ap| (a, ap))) { - if query_session_index_for_child(ctx, *ancestor_parent).await? != desired_session { - break; - } - acc.push(ancestor); - } - - debug_assert!(acc.len() <= k); - Ok(acc) -} - -#[derive(Clone)] -struct MetricsInner { - gossipped_availability_chunks: prometheus::Counter, - handle_our_view_change: prometheus::Histogram, - process_incoming_peer_message: prometheus::Histogram, -} - -/// Availability Distribution metrics. -#[derive(Default, Clone)] -pub struct Metrics(Option); - -impl Metrics { - fn on_chunk_distributed(&self) { - if let Some(metrics) = &self.0 { - metrics.gossipped_availability_chunks.inc(); - } - } - - /// Provide a timer for `handle_our_view_change` which observes on drop. - fn time_handle_our_view_change(&self) -> Option { - self.0.as_ref().map(|metrics| metrics.handle_our_view_change.start_timer()) - } - - /// Provide a timer for `process_incoming_peer_message` which observes on drop. - fn time_process_incoming_peer_message(&self) -> Option { - self.0.as_ref().map(|metrics| metrics.process_incoming_peer_message.start_timer()) - } -} - -impl metrics::Metrics for Metrics { - fn try_register( - registry: &prometheus::Registry, - ) -> std::result::Result { - let metrics = MetricsInner { - gossipped_availability_chunks: prometheus::register( - prometheus::Counter::new( - "parachain_gossipped_availability_chunks_total", - "Number of availability chunks gossipped to other peers.", - )?, - registry, - )?, - handle_our_view_change: prometheus::register( - prometheus::Histogram::with_opts( - prometheus::HistogramOpts::new( - "parachain_availability_distribution_handle_our_view_change", - "Time spent within `availability_distribution::handle_our_view_change`", - ) - )?, - registry, - )?, - process_incoming_peer_message: prometheus::register( - prometheus::Histogram::with_opts( - prometheus::HistogramOpts::new( - "parachain_availability_distribution_process_incoming_peer_message", - "Time spent within `availability_distribution::process_incoming_peer_message`", - ) - )?, - registry, - )?, - }; - Ok(Metrics(Some(metrics))) - } -} diff --git a/primitives/src/v0.rs b/primitives/src/v0.rs index d806e4a93c71..951624566172 100644 --- a/primitives/src/v0.rs +++ b/primitives/src/v0.rs @@ -114,7 +114,8 @@ impl MallocSizeOf for ValidatorId { } /// Index of the validator is used as a lightweight replacement of the `ValidatorId` when appropriate. -pub type ValidatorIndex = u32; +#[derive(Debug, Eq, Ord, PartialEq, PartialOrd, Hash)] +pub struct ValidatorIndex(u32); application_crypto::with_pair! { /// A Parachain validator keypair. From da850f7c07ab0824b5f8a2e6b98f4fa18e468ab4 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Tue, 9 Feb 2021 20:19:33 +0100 Subject: [PATCH 02/60] availability distribution, still very wip. Work on the requesting side of things. --- .../availability-distribution/src/error.rs | 58 +++++ .../src/fetch_task.rs | 102 +++++++++ .../availability-distribution/src/lib.rs | 201 ++++++------------ .../availability-distribution/src/state.rs | 173 +++++++++++++++ 4 files changed, 393 insertions(+), 141 deletions(-) create mode 100644 node/network/availability-distribution/src/error.rs create mode 100644 node/network/availability-distribution/src/fetch_task.rs create mode 100644 node/network/availability-distribution/src/state.rs diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs new file mode 100644 index 000000000000..f70b2876a5ad --- /dev/null +++ b/node/network/availability-distribution/src/error.rs @@ -0,0 +1,58 @@ +// Copyright 2021 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +#[derive(Debug, Error)] +enum Error { + #[error("Response channel to obtain StoreChunk failed")] + StoreChunkResponseChannel(#[source] oneshot::Canceled), + + #[error("Response channel to obtain QueryChunk failed")] + QueryChunkResponseChannel(#[source] oneshot::Canceled), + + #[error("Response channel to obtain QueryAncestors failed")] + QueryAncestorsResponseChannel(#[source] oneshot::Canceled), + #[error("RuntimeAPI to obtain QueryAncestors failed")] + QueryAncestors(#[source] ChainApiError), + + #[error("Response channel to obtain QuerySession failed")] + QuerySessionResponseChannel(#[source] oneshot::Canceled), + #[error("RuntimeAPI to obtain QuerySession failed")] + QuerySession(#[source] RuntimeApiError), + + #[error("Response channel to obtain QueryValidators failed")] + QueryValidatorsResponseChannel(#[source] oneshot::Canceled), + #[error("RuntimeAPI to obtain QueryValidators failed")] + QueryValidators(#[source] RuntimeApiError), + + #[error("Response channel to obtain AvailabilityCores failed")] + AvailabilityCoresResponseChannel(#[source] oneshot::Canceled), + #[error("RuntimeAPI to obtain AvailabilityCores failed")] + AvailabilityCores(#[source] RuntimeApiError), + + #[error("Response channel to obtain AvailabilityCores failed")] + QueryAvailabilityResponseChannel(#[source] oneshot::Canceled), + + #[error("Receive channel closed")] + IncomingMessageChannel(#[source] SubsystemError), +} + +type Result = std::result::Result; + +impl From for Error { + fn from(err: SubsystemError) -> Self { + Self::IncomingMessageChannel(err) + } +} diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/fetch_task.rs new file mode 100644 index 000000000000..8b583cb68f95 --- /dev/null +++ b/node/network/availability-distribution/src/fetch_task.rs @@ -0,0 +1,102 @@ +// Copyright 2021 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +struct FetchTask { + /// For what relay parents this task is relevant. + /// + /// In other words, for which relay chain parents this candidate is considered live. + /// This is updated on every `ActiveLeavesUpdate` and enables us to know when we can safely + /// stop keeping track of that candidate/chunk. + live_in: HashSet, + + /// The relay parent providing the context for the candidate. + relay_parent: Hash, + + /// Some details about the to be fetched candidate. + descriptor: CandidateDescriptor, + + /// We keep the task around in state `Fetched` until `live_in` becomes empty, to make + /// sure we won't re-fetch an already fetched candidate. + state: FetchedState, +} + +/// State of a particular candidate chunk fetching process. +enum FetchedState { + /// Chunk is currently being fetched. + Fetching, + /// Chunk has already been fetched successfully. + Fetched, + /// All relevant live_in have been removed, before we were able to get our chunk. + Canceled, +} + +impl FetchTask { + /// Start fetching a chunk. + pub async fn start(ctx: &mut Context, leaf: Hash, core: OccupiedCore) -> Self { + } + + /// Add the given leaf to the relay parents which are making this task relevant. + pub fn add_leaf(&mut self, leaf: Hash) { + self.live_in.insert(leaf); + } + + /// Remove leaves and cancel the task, if it was the last one and the task has still been + /// fetching. + pub fn remove_leaves(&mut self, leaves: HashSet) { + self.live_in.difference(leaves); + if self.live_in.is_empty() { + // TODO: Make sure, to actually cancel the task. + self.state = FetchedState::Canceled + } + } + + /// Whether or not this task can be considered finished. + /// + /// That is, it is either canceled or succeeded fetching the chunk. + pub fn is_finished(&self) -> bool { + match state { + FetchedState::Fetched | FetchedState::Canceled => true, + FetchedState::Fetching => false, + } + } + + /// Retrieve the relay parent providing the context for this candidate. + pub fn get_relay_parent(&self) -> Hash { + self.relay_parent + } +} + +/// Query the session index of a relay parent +#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))] +async fn query_session_index_for_child( + ctx: &mut Context, + relay_parent: Hash, +) -> Result +where + Context: SubsystemContext, +{ + let (tx, rx) = oneshot::channel(); + let query_session_idx_for_child = AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::SessionIndexForChild(tx), + )); + + ctx.send_message(query_session_idx_for_child) + .await; + rx.await + .map_err(|e| Error::QuerySessionResponseChannel(e))? + .map_err(|e| Error::QuerySession(e)) +} diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs index 9cd1f81337ab..47115bece0ea 100644 --- a/node/network/availability-distribution/src/lib.rs +++ b/node/network/availability-distribution/src/lib.rs @@ -1,4 +1,3 @@ - // Copyright 2021 Parity Technologies (UK) Ltd. // This file is part of Polkadot. @@ -15,159 +14,79 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -/// The bitfield distribution subsystem. -pub struct AvailabilityDistributionSubsystem { - /// Pointer to a keystore, which is required for determining this nodes validator index. - keystore: SyncCryptoStorePtr, - /// Prometheus metrics. - metrics: Metrics, -} -/// Metadata about a candidate that is part of the live_candidates set. -/// -/// Those which were not present in a cache are "fresh" and have their candidate descriptor attached. This -/// information is propagated to the higher level where it can be used to create data entries. Cached candidates -/// already have entries associated with them, and thus don't need this metadata to be fetched. -#[derive(Debug)] -enum FetchedLiveCandidate { - Cached, - Fresh(CandidateDescriptor), -} +/// Error and [`Result`] type for this subsystem. +mod error; +pub use error::Error; +use error::Result; -struct ProtocolState { - /// Candidates we need to fetch our chunk for. - chunks_to_fetch: HashMap, +/// The actual implementation of running availability distribution. +mod state; +/// State of a running availability-distribution subsystem. +use state::ProtocolState; - /// Localized information about sessions we are currently interested in. - /// - /// This is usually the current one and at session boundaries also the last one. - session_infos: HashMap, +/// A task fetching a particular chunk. +mod fetch_task; -} +const LOG_TARGET: &'static str = "availability_distribution"; -/// Localized session information, tailored for the needs of availability distribution. -struct SessionInfo { - /// For each core we maintain a randomized list of corresponding validators. - /// - /// This is so we can query them for chunks, trying them in order. As each validator will - /// have a randomized ordering, we should get good load balancing. - validator_groups: Vec>, -} -struct ChunkFetchingInfo { - descriptor: CandidateDescriptor, - /// Validators that backed the candidate and hopefully have our chunk. - backing_group: Vec, -} - -fn run() { - /// Get current heads - /// For each chunk/slot, update randomized list of validators to query on session bundaries. - /// Fetch pending availability candidates and add them to `chunks_to_fetch`. +/// The bitfield distribution subsystem. +pub struct AvailabilityDistributionSubsystem { + /// Pointer to a keystore, which is required for determining this nodes validator index. + keystore: SyncCryptoStorePtr, + /// Prometheus metrics. + metrics: Metrics, } -/// Obtain all live candidates under a particular relay head. This implicitly includes -/// `K` ancestors of the head, such that the candidates pending availability in all of -/// the states of the head and the ancestors are unioned together to produce the -/// return type of this function. Each candidate hash is paired with information about -/// from where it was fetched. -/// -/// This also updates all `live_under` cached by the protocol state and returns a list -/// of up to `K` ancestors of the relay-parent. -#[tracing::instrument(level = "trace", skip(ctx, live_under), fields(subsystem = LOG_TARGET))] -async fn query_live_candidates( - ctx: &mut Context, - live_under: &mut HashMap>, - relay_parent: Hash, -) -> Result<(HashMap, Vec)> +impl Subsystem for AvailabilityDistributionSubsystem where - Context: SubsystemContext, + Context: SubsystemContext + Sync + Send, { - // register one of relay parents (not the ancestors) - let ancestors = query_up_to_k_ancestors_in_same_session( - ctx, - relay_parent, - AvailabilityDistributionSubsystem::K, - ) - .await?; - - // query the ones that were not present in the live_under cache and add them - // to it. - let live_candidates = query_pending_availability_at( - ctx, - ancestors.iter().cloned().chain(iter::once(relay_parent)), - live_under, - ).await?; - - Ok((live_candidates, ancestors)) + fn start(self, ctx: Context) -> SpawnedSubsystem { + let future = self + .run(ctx, ProtocolState::new()) + .map_err(|e| SubsystemError::with_origin("availability-distribution", e)) + .boxed(); + + SpawnedSubsystem { + name: "availability-distribution-subsystem", + future, + } + } } -/// Obtain all live candidates for all given `relay_blocks`. -/// -/// This returns a set of all candidate hashes pending availability within the state -/// of the explicitly referenced relay heads. -/// -/// This also queries the provided `live_under` cache before reaching into the -/// runtime and updates it with the information learned. -#[tracing::instrument(level = "trace", skip(ctx, relay_blocks, live_under), fields(subsystem = LOG_TARGET))] -async fn query_pending_availability_at( - ctx: &mut Context, - relay_blocks: impl IntoIterator, - live_under: &mut HashMap>, -) -> Result> -where - Context: SubsystemContext, -{ - let mut live_candidates = HashMap::new(); - - // fetch and fill out cache for each of these - for relay_parent in relay_blocks { - let receipts_for = match live_under.entry(relay_parent) { - Entry::Occupied(e) => { - live_candidates.extend( - e.get().iter().cloned().map(|c| (c, FetchedLiveCandidate::Cached)) - ); - continue - }, - e => e.or_default(), - }; - - for (receipt_hash, descriptor) in query_pending_availability(ctx, relay_parent).await? { - // unfortunately we have no good way of telling the candidate was - // cached until now. But we don't clobber a `Cached` entry if there - // is one already. - live_candidates.entry(receipt_hash).or_insert(FetchedLiveCandidate::Fresh(descriptor)); - receipts_for.insert(receipt_hash); - } + +impl AvailabilityDistributionSubsystem { + /// Create a new instance of the availability distribution. + pub fn new(keystore: SyncCryptoStorePtr, metrics: Metrics) -> Self { + Self { keystore, metrics } } - Ok(live_candidates) + /// Start processing work as passed on from the Overseer. + async fn run(self, ctx: Context, state: &mut ProtocolState) -> Result<()> { + loop { + let message = ctx.recv().await?; + match message { + FromOverseer::Signal(OverseerSignal::ActiveLeaves(update)) => { + // Update the relay chain heads we are fetching our pieces for: + state.update_fetching_heads(&mut ctx, update)?; + } + FromOverseer::Signal(OverseerSignal::BlockFinalized(..)) => {} + FromOverseer::Signal(OverseerSignal::Conclude) => { + return Ok(()); + } + FromOverseer::Communication { + msg: AvailabilityDistributionMessage::AvailabilityFetchingRequest(_), + } => { + // TODO: Implement issue 2306: + tracing::warn!( + target: LOG_TARGET, + "To be implemented, see: https://github.com/paritytech/polkadot/issues/2306!", + ); + } + } + } + } } -/// Query all hashes and descriptors of candidates pending availability at a particular block. -#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))] -async fn query_pending_availability(ctx: &mut Context, relay_parent: Hash) - -> Result> -where - Context: SubsystemContext, -{ - let (tx, rx) = oneshot::channel(); - ctx.send_message(AllMessages::RuntimeApi(RuntimeApiMessage::Request( - relay_parent, - RuntimeApiRequest::AvailabilityCores(tx), - ))) - .await; - - let cores: Vec<_> = rx - .await - .map_err(|e| Error::AvailabilityCoresResponseChannel(e))? - .map_err(|e| Error::AvailabilityCores(e))?; - - Ok(cores.into_iter() - .filter_map(|core_state| if let CoreState::Occupied(occupied) = core_state { - Some((occupied.candidate_hash, occupied.candidate_descriptor)) - } else { - None - }) - .collect()) -} diff --git a/node/network/availability-distribution/src/state.rs b/node/network/availability-distribution/src/state.rs new file mode 100644 index 000000000000..e9fcb7829134 --- /dev/null +++ b/node/network/availability-distribution/src/state.rs @@ -0,0 +1,173 @@ +// Copyright 2021 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +//! `ProtocolState` representing a running availability distribution subsystem. + +use itertools::{Itertools, Either} + +use super::{Result, LOG_TARGET}; + +/// A running instance of this subsystem. +struct ProtocolState { + /// Candidates we need to fetch our chunk for. + fetches: HashMap, + + /// Localized information about sessions we are currently interested in. + /// + /// This is usually the current one and at session boundaries also the last one. + live_sessions: HashMap, +} + +/// Localized session information, tailored for the needs of availability distribution. +struct SessionInfo { + /// Validator groups of the current session. + /// + /// Each group's order is randomized. This way we achieve load balancing when requesting + /// chunks, as the validators in a group will be tried in that randomized order. Each node + /// should arrive at a different order, therefore we distribute the load. + validator_groups: Vec>, + + /// Information about ourself: + validator_id: ValidatorId, + + /// The relay parents we are keeping this entry for. + live_in: HashSet, +} + +struct ChunkFetchingInfo { + descriptor: CandidateDescriptor, + /// Validators that backed the candidate and hopefully have our chunk. + backing_group: Vec, +} + +impl ProtocolState { + /// Update heads that need availability distribution. + /// + /// For all active heads we will be fetching our chunk for availabilty distribution. + pub(crate) fn update_fetching_heads( + &mut self, + ctx: &mut Context, + update: ActiveLeavesUpdate, + ) -> Result<()> { + let ActiveLeavesUpdate { + activated, + deactivated, + } = update; + // Order important! We need to handle activated, prior to deactivated, otherwise we might + // cancel still needed jobs. + self.start_requesting_chunks(ctx, activated)?; + let dead_parents = self.stop_requesting_chunks(ctx, deactivated)?; + } + + /// Start requesting chunks for newly imported heads. + fn start_requesting_chunks( + &mut self, + ctx: &mut Context, + new_heads: &SmallVec<[(Hash, Arc)]>, + ) -> Result<()> { + for (leaf, _) in new_heads { + let cores = query_occupied_cores(ctx, leaf).await?; + add_cores(cores)?; + } + Ok(()) + } + + /// Stop requesting chunks for obsolete heads. + /// + /// Returns relay_parents which became irrelevant for availability fetching (are not + /// referenced by any candidate anymore). + fn stop_requesting_chunks( + &mut self, + ctx: &mut Context, + obsolete_leaves: &SmallVec<[(Hash, Arc)]>, + ) -> Result> { + let obsolete_leaves: HashSet<_> = obsolete_leaves.into_iter().map(|h| h.0).collect(); + let (obsolete_parents, new_fetches): (HashSet<_>, HashMap<_>) = + self.fetches.into_iter().partition_map(|(c_hash, task)| { + task.remove_leaves(HashSet::from(obsolete_leaves)); + if task.is_finished() { + Either::Left(task.get_relay_parent()) + } else { + Either::Right((c_hash, task)) + } + }); + self.fetches = new_fetches; + obsolete_parents + } + + /// Add candidates corresponding for a particular relay parent. + /// + /// Starting requests where necessary. + /// + /// Note: The passed in `leaf` is not the same as CandidateDescriptor::relay_parent in the + /// given cores. The latter is the relay_parent this candidate considers its parent, while the + /// passed in leaf might be some later block where the candidate is still pending availability. + fn add_cores( + &mut self, + ctx: &mut Context, + leaf: Hash, + cores: impl IntoIter, + ) { + for core in cores { + match self.fetches.entry(core.candidate_hash) { + Entry::Occupied(e) => + // Just book keeping - we are already requesting that chunk: + e.relay_parents.insert(leaf), + Entry::Vacant(e) => { + e.insert(FetchTask::start(ctx, leaf, core)) + } + } + } + } +} + +/// Start requesting our chunk for the given core. +fn start_request_chunk(core: OccupiedCore) -> FetchTask { + panic!("TODO: To be implemented!"); +} + +/// Query all hashes and descriptors of candidates pending availability at a particular block. +#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))] +async fn query_occupied_cores( + ctx: &mut Context, + relay_parent: Hash, +) -> Result> +where + Context: SubsystemContext, +{ + let (tx, rx) = oneshot::channel(); + ctx.send_message(AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::AvailabilityCores(tx), + ))) + .await; + + let cores: Vec<_> = rx + .await + .map_err(|e| Error::AvailabilityCoresResponseChannel(e))? + .map_err(|e| Error::AvailabilityCores(e))?; + + Ok(cores + .into_iter() + .filter_map(|core_state| { + if let CoreState::Occupied(occupied) = core_state { + Some(occupied) + } else { + None + } + }) + .collect()) +} From fbf0ec1a656b9a7afdfc9e9b282de816ac15582e Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Tue, 9 Feb 2021 21:29:04 +0100 Subject: [PATCH 03/60] Some docs on what I intend to do. --- .../availability-distribution/src/state.rs | 40 ++++++++++++++++++- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/node/network/availability-distribution/src/state.rs b/node/network/availability-distribution/src/state.rs index e9fcb7829134..4e71032c9411 100644 --- a/node/network/availability-distribution/src/state.rs +++ b/node/network/availability-distribution/src/state.rs @@ -15,7 +15,43 @@ // along with Polkadot. If not, see . //! `ProtocolState` representing a running availability distribution subsystem. - +//! +//! We keep track of [`FetchTask`]s, which get created on [`ActiveLeavesUpdate`]s for each occupied +//! core in the leaves, if we have not yet created it before. We keep track for which +//! relay parents a `FetchTask` is considered live (corresponding slot is occupied with the +//! candidate fetched). Once there is no relay parent left for which that task is considered live, +//! it gets removed. +//! +//! We keep that task around as long as its corresponding candidate is considered pending +//! availability, even if we fetched our chunk already. This is so we won't fetch our piece again, +//! just because the candidate is still pending availability in the next block. +//! +//! We are also dependent on session information. We need to know which validators are in a +//! particular validator group, backing our candidate, so we can request our erasure chunk from +//! them. +//! +//! We want to randomize the list of validators in each group, so we get a +//! random order of validators to try to get the chunk from. This is to ensure load balancing, each +//! requesting validator should have a different order, thus trying different validators. +//! +//! But We would like to keep that randomized order around for an entire session, so our particular +//! validator will always request from the same validators, thus making sure it will find an open +//! network connection on each request. +//! +//! (TODO: What to do on session boundaries? Initial delay acceptable? Connect with some fake +//! request to future validators? Use a peer set after all and connect that to the future session?) +//! +//! So we need to keep some customized session info around, which seems to be a good idea for +//! performance reasons anyway. That's where `SessionCache` comes into play. It is used to keep +//! session information around as long as we need it. But how long do we need it? How do we manage +//! that cache? We can't rely on `ActiveLeavesUpdate`s heads alone, as we might get occupied slots +//! for heads we never got an `ActiveLeavesUpdate` from, therefore we don't populate the session +//! cache with sessions our leaves correspond to, but directly with the sessions of the relay +//! parents of our `CandidateDescriptor`s. So, its clear how to populate the cache, but when can we +//! get rid of cached session information? Easy! When there is no candidate/FetchTask around +//! anymore which references it. Thus the cache simply consists of `Weak` pointers to the actual +//! session infos and the `FetchTask`s keep `Rc`s, therefore we know exactly when we can get rid of +//! a cache entry by means of the Weak pointer evaluating to `None`. use itertools::{Itertools, Either} use super::{Result, LOG_TARGET}; @@ -28,7 +64,7 @@ struct ProtocolState { /// Localized information about sessions we are currently interested in. /// /// This is usually the current one and at session boundaries also the last one. - live_sessions: HashMap, + live_sessions: HashMap>, } /// Localized session information, tailored for the needs of availability distribution. From ac543c1e394f88e931641503324b02d6b3156b6c Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Wed, 10 Feb 2021 15:49:48 +0100 Subject: [PATCH 04/60] Checkpoint of session cache implementation as I will likely replace it with something smarter. --- .../availability-distribution/src/error.rs | 7 ++ .../src/fetch_task.rs | 6 ++ .../availability-distribution/src/lib.rs | 3 + .../src/session_cache.rs | 100 ++++++++++++++++++ .../availability-distribution/src/state.rs | 27 ++--- 5 files changed, 122 insertions(+), 21 deletions(-) create mode 100644 node/network/availability-distribution/src/session_cache.rs diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs index f70b2876a5ad..3e7b1cee07af 100644 --- a/node/network/availability-distribution/src/error.rs +++ b/node/network/availability-distribution/src/error.rs @@ -13,6 +13,9 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . +// + +use subsystem_util::Error as UtilError; #[derive(Debug, Error)] enum Error { @@ -47,6 +50,10 @@ enum Error { #[error("Receive channel closed")] IncomingMessageChannel(#[source] SubsystemError), + + /// Some request to the runtime in the session cache failed. + #[error("Session cache runtime request failed")] + SessionCacheRuntimRequest(#[source] UtilError), } type Result = std::result::Result; diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/fetch_task.rs index 8b583cb68f95..ff329993508c 100644 --- a/node/network/availability-distribution/src/fetch_task.rs +++ b/node/network/availability-distribution/src/fetch_task.rs @@ -14,6 +14,10 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . +use std::rc::Rc; + +use super::session_cache::SessionInfo; + struct FetchTask { /// For what relay parents this task is relevant. /// @@ -31,6 +35,8 @@ struct FetchTask { /// We keep the task around in state `Fetched` until `live_in` becomes empty, to make /// sure we won't re-fetch an already fetched candidate. state: FetchedState, + + session: Rc } /// State of a particular candidate chunk fetching process. diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs index 47115bece0ea..7c7c4c9cc64c 100644 --- a/node/network/availability-distribution/src/lib.rs +++ b/node/network/availability-distribution/src/lib.rs @@ -28,6 +28,9 @@ use state::ProtocolState; /// A task fetching a particular chunk. mod fetch_task; +/// Cache for session information. +mod session_cache; + const LOG_TARGET: &'static str = "availability_distribution"; diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs new file mode 100644 index 000000000000..a0c2acc0a84e --- /dev/null +++ b/node/network/availability-distribution/src/session_cache.rs @@ -0,0 +1,100 @@ +// Copyright 2021 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use std::rc::Weak; + +use super::{LOG_TARGET, error::Result, Error}; + +/// Caching of session info as needed by availability distribution. +/// +/// It should be ensured that a cached session stays live in the cache as long as we might need it. +/// A warning will be logged, if an already dead entry gets fetched. +struct SessionCache { + /// Maintain caches for session information for currently relay parents of interest. + /// + /// Fast path - if we have an entry here, no query to the runtime is necessary at all. + by_relay_parent: HashMap, + + /// Look up cached sessions by SessionIndex. + /// + /// Slower path - we still have to look up the `SessionIndex` in the runtime, but still might have + /// the session ready already. + /// + /// Note: Performance of fetching is really secondary here, but we need to ensure we are going + /// to get any existing cache entry, before fetching new information, as we should not mess up + /// the order of validators. + by_session_index: HashMap, +} + +/// Localized session information, tailored for the needs of availability distribution. +pub struct SessionInfo { + /// Validator groups of the current session. + /// + /// Each group's order is randomized. This way we achieve load balancing when requesting + /// chunks, as the validators in a group will be tried in that randomized order. Each node + /// should arrive at a different order, therefore we distribute the load. + pub validator_groups: Vec>, + + /// Information about ourself: + pub our_index: ValidatorIndex, +} + +impl SessionCache { + + /// Retrieve session info for the given relay parent. + /// + /// This function will query the cache first and will only query the runtime on cache miss. + pub fn fetch_session_info(&mut self, ctx: &mut Context, relay_parent: Hash) -> Result> { + if let Some(info) = self.get_by_relay_parent(relay_parent) { + return Ok(info) + } + let session_index = request_session_index_for_child_ctx(parent, ctx).await + .map_err(|e| Error::SessionCacheRuntimRequest(e))?; + if let Some(info) = self.get_by_session_index(session_index) { + self.by_relay_parent.insert(relay_parent, info.downgrade); + return Ok(info); + } + + } + /// Get session info for a particular relay parent. + /// + /// Returns: None, if no entry for that relay parent exists in the cache (or it was dead + /// already - which should not happen.) + fn get_by_relay_parent(&self, relay_parent: Hash) -> Option> { + let weak_ref = self.by_relay_parent.get(relay_parent)?; + upgrade_report_dead(weak_ref) + } + + /// Get session info for a given `SessionIndex`. + fn get_by_session_index(&self, session_id: SessionId) -> Option> { + let weak_ref = self.by_session_id.get(session_id)?; + upgrade_report_dead(weak_ref) + } +} + +/// Upgrade a weak SessionInfo reference. +/// +/// Warn if it was dead already, as this should not happen. Cache should stay valid at least as +/// long as we need it. +fn upgrade_report_dead(info: Weak>) -> Option> { + match weak_ref.upgrade() { + Some(info) => Some(info), + None => { + tracing::warn!(LOG_TARGET, relay_parent, "A no longer cached session got requested, this should not happen in normal operation."); + None + } + } +} diff --git a/node/network/availability-distribution/src/state.rs b/node/network/availability-distribution/src/state.rs index 4e71032c9411..7c6760b51862 100644 --- a/node/network/availability-distribution/src/state.rs +++ b/node/network/availability-distribution/src/state.rs @@ -48,13 +48,13 @@ //! for heads we never got an `ActiveLeavesUpdate` from, therefore we don't populate the session //! cache with sessions our leaves correspond to, but directly with the sessions of the relay //! parents of our `CandidateDescriptor`s. So, its clear how to populate the cache, but when can we -//! get rid of cached session information? Easy! When there is no candidate/FetchTask around -//! anymore which references it. Thus the cache simply consists of `Weak` pointers to the actual -//! session infos and the `FetchTask`s keep `Rc`s, therefore we know exactly when we can get rid of -//! a cache entry by means of the Weak pointer evaluating to `None`. +//! get rid of cached session information? If for sure is safe to do when there is no +//! candidate/FetchTask around anymore which references it. Thus the cache simply consists of +//! `Weak` pointers to the actual session infos and the `FetchTask`s keep `Rc`s, therefore we know +//! exactly when we can get rid of a cache entry by means of the Weak pointer evaluating to `None`. use itertools::{Itertools, Either} -use super::{Result, LOG_TARGET}; +use super::{Result, LOG_TARGET, session_cache::SessionCache}; /// A running instance of this subsystem. struct ProtocolState { @@ -64,24 +64,9 @@ struct ProtocolState { /// Localized information about sessions we are currently interested in. /// /// This is usually the current one and at session boundaries also the last one. - live_sessions: HashMap>, + session_cache: SessionCache, } -/// Localized session information, tailored for the needs of availability distribution. -struct SessionInfo { - /// Validator groups of the current session. - /// - /// Each group's order is randomized. This way we achieve load balancing when requesting - /// chunks, as the validators in a group will be tried in that randomized order. Each node - /// should arrive at a different order, therefore we distribute the load. - validator_groups: Vec>, - - /// Information about ourself: - validator_id: ValidatorId, - - /// The relay parents we are keeping this entry for. - live_in: HashSet, -} struct ChunkFetchingInfo { descriptor: CandidateDescriptor, From 07f6bc32ae14ac2f947ba127f3d401386a0a2f3d Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Thu, 11 Feb 2021 18:27:53 +0100 Subject: [PATCH 05/60] More work, mostly on cache and getting things to type check. --- .../availability-distribution/Cargo.toml | 2 + .../availability-distribution/src/error.rs | 31 ++--- .../src/fetch_task.rs | 58 +++++----- .../availability-distribution/src/lib.rs | 17 ++- .../src/session_cache.rs | 108 +++++++++++++++--- .../availability-distribution/src/state.rs | 78 +++++++------ node/subsystem-util/src/lib.rs | 2 +- primitives/src/v0.rs | 4 +- 8 files changed, 190 insertions(+), 110 deletions(-) diff --git a/node/network/availability-distribution/Cargo.toml b/node/network/availability-distribution/Cargo.toml index add6b2c43d33..1835078a3683 100644 --- a/node/network/availability-distribution/Cargo.toml +++ b/node/network/availability-distribution/Cargo.toml @@ -14,9 +14,11 @@ polkadot-erasure-coding = { path = "../../../erasure-coding" } polkadot-subsystem = { package = "polkadot-node-subsystem", path = "../../subsystem" } polkadot-node-network-protocol = { path = "../../network/protocol" } polkadot-node-subsystem-util = { path = "../../subsystem-util" } +polkadot-node-core-runtime-api = { path = "../../core/runtime-api" } sp-core = { git = "https://github.com/paritytech/substrate", branch = "master", features = ["std"] } sp-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" } thiserror = "1.0.23" +rand = "0.8.3" [dev-dependencies] polkadot-subsystem-testhelpers = { package = "polkadot-node-subsystem-test-helpers", path = "../../subsystem-test-helpers" } diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs index 3e7b1cee07af..68551dbad62f 100644 --- a/node/network/availability-distribution/src/error.rs +++ b/node/network/availability-distribution/src/error.rs @@ -15,36 +15,21 @@ // along with Polkadot. If not, see . // -use subsystem_util::Error as UtilError; +use thiserror::Error; + +use futures::channel::oneshot; + +use polkadot_node_subsystem_util::Error as UtilError; +use polkadot_subsystem::{SubsystemError}; #[derive(Debug, Error)] -enum Error { +pub enum Error { #[error("Response channel to obtain StoreChunk failed")] StoreChunkResponseChannel(#[source] oneshot::Canceled), #[error("Response channel to obtain QueryChunk failed")] QueryChunkResponseChannel(#[source] oneshot::Canceled), - #[error("Response channel to obtain QueryAncestors failed")] - QueryAncestorsResponseChannel(#[source] oneshot::Canceled), - #[error("RuntimeAPI to obtain QueryAncestors failed")] - QueryAncestors(#[source] ChainApiError), - - #[error("Response channel to obtain QuerySession failed")] - QuerySessionResponseChannel(#[source] oneshot::Canceled), - #[error("RuntimeAPI to obtain QuerySession failed")] - QuerySession(#[source] RuntimeApiError), - - #[error("Response channel to obtain QueryValidators failed")] - QueryValidatorsResponseChannel(#[source] oneshot::Canceled), - #[error("RuntimeAPI to obtain QueryValidators failed")] - QueryValidators(#[source] RuntimeApiError), - - #[error("Response channel to obtain AvailabilityCores failed")] - AvailabilityCoresResponseChannel(#[source] oneshot::Canceled), - #[error("RuntimeAPI to obtain AvailabilityCores failed")] - AvailabilityCores(#[source] RuntimeApiError), - #[error("Response channel to obtain AvailabilityCores failed")] QueryAvailabilityResponseChannel(#[source] oneshot::Canceled), @@ -56,7 +41,7 @@ enum Error { SessionCacheRuntimRequest(#[source] UtilError), } -type Result = std::result::Result; +pub type Result = std::result::Result; impl From for Error { fn from(err: SubsystemError) -> Self { diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/fetch_task.rs index ff329993508c..d84d24001aa9 100644 --- a/node/network/availability-distribution/src/fetch_task.rs +++ b/node/network/availability-distribution/src/fetch_task.rs @@ -15,10 +15,26 @@ // along with Polkadot. If not, see . use std::rc::Rc; - -use super::session_cache::SessionInfo; - -struct FetchTask { +use std::collections::HashSet; + +use futures::channel::oneshot; + +use polkadot_primitives::v1::{ + BlakeTwo256, CoreState, ErasureChunk, Hash, HashT, + SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID, CandidateHash, + CandidateDescriptor, OccupiedCore, +}; +use polkadot_subsystem::{ + jaeger, errors::{ChainApiError, RuntimeApiError}, PerLeafSpan, + ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem, Subsystem, SubsystemContext, SubsystemError, +}; +use polkadot_subsystem::messages::{ + AllMessages, AvailabilityDistributionMessage, AvailabilityStoreMessage, ChainApiMessage, + NetworkBridgeMessage, RuntimeApiMessage, RuntimeApiRequest, NetworkBridgeEvent +}; +use super::{session_cache::SessionInfo, LOG_TARGET}; + +pub struct FetchTask { /// For what relay parents this task is relevant. /// /// In other words, for which relay chain parents this candidate is considered live. @@ -50,9 +66,13 @@ enum FetchedState { } impl FetchTask { - /// Start fetching a chunk. - pub async fn start(ctx: &mut Context, leaf: Hash, core: OccupiedCore) -> Self { - } +// /// Start fetching a chunk. + // pub async fn start(ctx: &mut Context, leaf: Hash, core: OccupiedCore) -> Self + // where + // Context: SubsystemContext, + // { + // panic + // } /// Add the given leaf to the relay parents which are making this task relevant. pub fn add_leaf(&mut self, leaf: Hash) { @@ -73,7 +93,7 @@ impl FetchTask { /// /// That is, it is either canceled or succeeded fetching the chunk. pub fn is_finished(&self) -> bool { - match state { + match self.state { FetchedState::Fetched | FetchedState::Canceled => true, FetchedState::Fetching => false, } @@ -84,25 +104,3 @@ impl FetchTask { self.relay_parent } } - -/// Query the session index of a relay parent -#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))] -async fn query_session_index_for_child( - ctx: &mut Context, - relay_parent: Hash, -) -> Result -where - Context: SubsystemContext, -{ - let (tx, rx) = oneshot::channel(); - let query_session_idx_for_child = AllMessages::RuntimeApi(RuntimeApiMessage::Request( - relay_parent, - RuntimeApiRequest::SessionIndexForChild(tx), - )); - - ctx.send_message(query_session_idx_for_child) - .await; - rx.await - .map_err(|e| Error::QuerySessionResponseChannel(e))? - .map_err(|e| Error::QuerySession(e)) -} diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs index 7c7c4c9cc64c..c0792db5d722 100644 --- a/node/network/availability-distribution/src/lib.rs +++ b/node/network/availability-distribution/src/lib.rs @@ -15,6 +15,13 @@ // along with Polkadot. If not, see . +use sp_keystore::SyncCryptoStorePtr; + +use polkadot_subsystem::{ + jaeger, errors::{ChainApiError, RuntimeApiError}, PerLeafSpan, + ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem, Subsystem, SubsystemContext, SubsystemError, messages::AvailabilityDistributionMessage +}; + /// Error and [`Result`] type for this subsystem. mod error; pub use error::Error; @@ -34,6 +41,11 @@ mod session_cache; const LOG_TARGET: &'static str = "availability_distribution"; + +/// Availability Distribution metrics. +/// TODO: Dummy for now. +type Metrics = (); + /// The bitfield distribution subsystem. pub struct AvailabilityDistributionSubsystem { /// Pointer to a keystore, which is required for determining this nodes validator index. @@ -67,7 +79,10 @@ impl AvailabilityDistributionSubsystem { } /// Start processing work as passed on from the Overseer. - async fn run(self, ctx: Context, state: &mut ProtocolState) -> Result<()> { + async fn run(self, mut ctx: Context, state: &mut ProtocolState) -> Result<()> + where + Context: SubsystemContext + Sync + Send, + { loop { let message = ctx.recv().await?; match message { diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs index a0c2acc0a84e..c5c34f35f002 100644 --- a/node/network/availability-distribution/src/session_cache.rs +++ b/node/network/availability-distribution/src/session_cache.rs @@ -14,19 +14,36 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use std::rc::Weak; +use std::collections::HashMap; +use std::rc::{Rc, Weak}; -use super::{LOG_TARGET, error::Result, Error}; +use rand::{seq::SliceRandom, thread_rng}; + +use sp_keystore::{CryptoStore, SyncCryptoStorePtr}; + +use super::{error::Result, Error, LOG_TARGET}; +use polkadot_node_subsystem_util::{ + request_session_index_for_child_ctx, request_validator_groups_ctx, request_validators_ctx, +}; +use polkadot_primitives::v1::{ + BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, ErasureChunk, Hash, HashT, + SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID, +}; +use polkadot_subsystem::{ + errors::{ChainApiError, RuntimeApiError}, + jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem, + Subsystem, SubsystemContext, SubsystemError, +}; /// Caching of session info as needed by availability distribution. /// /// It should be ensured that a cached session stays live in the cache as long as we might need it. /// A warning will be logged, if an already dead entry gets fetched. -struct SessionCache { +pub struct SessionCache { /// Maintain caches for session information for currently relay parents of interest. /// /// Fast path - if we have an entry here, no query to the runtime is necessary at all. - by_relay_parent: HashMap, + by_relay_parent: HashMap>, /// Look up cached sessions by SessionIndex. /// @@ -36,7 +53,10 @@ struct SessionCache { /// Note: Performance of fetching is really secondary here, but we need to ensure we are going /// to get any existing cache entry, before fetching new information, as we should not mess up /// the order of validators. - by_session_index: HashMap, + by_session_index: HashMap>, + + /// Key store for determining whether we are a validator and what `ValidatorIndex` we have. + keystore: SyncCryptoStorePtr, } /// Localized session information, tailored for the needs of availability distribution. @@ -48,27 +68,60 @@ pub struct SessionInfo { /// should arrive at a different order, therefore we distribute the load. pub validator_groups: Vec>, + /// All validators of that session. + /// + /// Needed for authority discovery and finding ourselves. + pub validators: Vec, + /// Information about ourself: pub our_index: ValidatorIndex, } impl SessionCache { - /// Retrieve session info for the given relay parent. /// /// This function will query the cache first and will only query the runtime on cache miss. - pub fn fetch_session_info(&mut self, ctx: &mut Context, relay_parent: Hash) -> Result> { - if let Some(info) = self.get_by_relay_parent(relay_parent) { - return Ok(info) + /// + /// Returns: `Ok(None)` in case this node is not a validator in the current session. + pub async fn fetch_session_info( + &mut self, + ctx: &mut Context, + parent: Hash, + ) -> Result>> + where + Context: SubsystemContext, + { + if let Some(info) = self.get_by_relay_parent(parent) { + return Ok(Some(info)); } - let session_index = request_session_index_for_child_ctx(parent, ctx).await + let session_index = request_session_index_for_child_ctx(parent, ctx) + .await? + .await .map_err(|e| Error::SessionCacheRuntimRequest(e))?; if let Some(info) = self.get_by_session_index(session_index) { - self.by_relay_parent.insert(relay_parent, info.downgrade); - return Ok(info); + self.by_relay_parent.insert(parent, info.downgrade()); + return Ok(Some(info)); } - + if let Some((our_index, validators)) = self.query_validator_info(ctx, parent).await? { + let (mut validator_groups, _) = request_validator_groups_ctx(parent, ctx).await?.await?; + // Shuffle validators in groups: + let mut rng = thread_rng(); + for g in validator_groups.iter_mut() { + g.shuffle(&rng) + } + let info = Rc::new(SessionInfo { + validator_groups, + validators, + our_index, + }); + let downgraded = info.downgrade(); + self.by_relay_parent.insert(parent, downgraded); + self.get_by_session_index.insert(session_index, downgraded); + return Ok(Some(info)); + } + Ok(None) } + /// Get session info for a particular relay parent. /// /// Returns: None, if no entry for that relay parent exists in the cache (or it was dead @@ -79,21 +132,40 @@ impl SessionCache { } /// Get session info for a given `SessionIndex`. - fn get_by_session_index(&self, session_id: SessionId) -> Option> { - let weak_ref = self.by_session_id.get(session_id)?; + fn get_by_session_index(&self, session_index: SessionIndex) -> Option> { + let weak_ref = self.by_session_index.get(session_index)?; upgrade_report_dead(weak_ref) } + + /// Get our validator id and the validators in the current session. + /// + /// Returns: Ok(None) if we are not a validator. + async fn query_validator_info( + &self, + &ctx: &mut Context, + parent: Hash, + ) -> Result)>> { + let validators = request_validators_ctx(ctx, parent).await?.await?; + for (i, v) in validators.iter().enumerate() { + if CryptoStore::has_keys(&*self.keystore, &[(v.to_raw_vec(), ValidatorId::ID)]) + .await + { + return Ok(Some((i as ValidatorIndex, validators))); + } + } + Ok(None) + } } /// Upgrade a weak SessionInfo reference. /// /// Warn if it was dead already, as this should not happen. Cache should stay valid at least as /// long as we need it. -fn upgrade_report_dead(info: Weak>) -> Option> { - match weak_ref.upgrade() { +fn upgrade_report_dead(info: Weak) -> Option> { + match info.upgrade() { Some(info) => Some(info), None => { - tracing::warn!(LOG_TARGET, relay_parent, "A no longer cached session got requested, this should not happen in normal operation."); + tracing::warn!(LOG_TARGET, "A no longer cached session got requested, this should not happen in normal operation."); None } } diff --git a/node/network/availability-distribution/src/state.rs b/node/network/availability-distribution/src/state.rs index 7c6760b51862..f47db8c94efa 100644 --- a/node/network/availability-distribution/src/state.rs +++ b/node/network/availability-distribution/src/state.rs @@ -52,12 +52,33 @@ //! candidate/FetchTask around anymore which references it. Thus the cache simply consists of //! `Weak` pointers to the actual session infos and the `FetchTask`s keep `Rc`s, therefore we know //! exactly when we can get rid of a cache entry by means of the Weak pointer evaluating to `None`. -use itertools::{Itertools, Either} -use super::{Result, LOG_TARGET, session_cache::SessionCache}; +use std::collections::{ + hash_map::{Entry, HashMap}, + hash_set::HashSet, +}; +use std::iter::IntoIterator; +use std::sync::Arc; + +use futures::channel::oneshot; +use jaeger::JaegerSpan; + +use itertools::{Either, Itertools}; + +use super::{fetch_task::FetchTask, session_cache::SessionCache, Result, LOG_TARGET}; +use polkadot_primitives::v1::{ + BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, ErasureChunk, Hash, HashT, + OccupiedCore, SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID, +}; +use polkadot_subsystem::{ + errors::{ChainApiError, RuntimeApiError}, + jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem, + Subsystem, SubsystemContext, SubsystemError, messages::AvailabilityDistributionMessage, +}; +use polkadot_node_subsystem_util::request_availability_cores_ctx; /// A running instance of this subsystem. -struct ProtocolState { +pub struct ProtocolState { /// Candidates we need to fetch our chunk for. fetches: HashMap, @@ -67,7 +88,6 @@ struct ProtocolState { session_cache: SessionCache, } - struct ChunkFetchingInfo { descriptor: CandidateDescriptor, /// Validators that backed the candidate and hopefully have our chunk. @@ -78,7 +98,7 @@ impl ProtocolState { /// Update heads that need availability distribution. /// /// For all active heads we will be fetching our chunk for availabilty distribution. - pub(crate) fn update_fetching_heads( + pub(crate) fn update_fetching_heads( &mut self, ctx: &mut Context, update: ActiveLeavesUpdate, @@ -94,14 +114,17 @@ impl ProtocolState { } /// Start requesting chunks for newly imported heads. - fn start_requesting_chunks( + async fn start_requesting_chunks( &mut self, ctx: &mut Context, - new_heads: &SmallVec<[(Hash, Arc)]>, - ) -> Result<()> { + new_heads: impl Iterator)>, + ) -> Result<()> + where + Context: SubsystemContext + Sync + Send, + { for (leaf, _) in new_heads { let cores = query_occupied_cores(ctx, leaf).await?; - add_cores(cores)?; + self.add_cores(ctx, leaf, cores)?; } Ok(()) } @@ -110,10 +133,10 @@ impl ProtocolState { /// /// Returns relay_parents which became irrelevant for availability fetching (are not /// referenced by any candidate anymore). - fn stop_requesting_chunks( + fn stop_requesting_chunks( &mut self, ctx: &mut Context, - obsolete_leaves: &SmallVec<[(Hash, Arc)]>, + obsolete_leaves: impl Iterator)>, ) -> Result> { let obsolete_leaves: HashSet<_> = obsolete_leaves.into_iter().map(|h| h.0).collect(); let (obsolete_parents, new_fetches): (HashSet<_>, HashMap<_>) = @@ -136,50 +159,35 @@ impl ProtocolState { /// Note: The passed in `leaf` is not the same as CandidateDescriptor::relay_parent in the /// given cores. The latter is the relay_parent this candidate considers its parent, while the /// passed in leaf might be some later block where the candidate is still pending availability. - fn add_cores( + fn add_cores( &mut self, ctx: &mut Context, leaf: Hash, - cores: impl IntoIter, + cores: impl IntoIterator, ) { for core in cores { match self.fetches.entry(core.candidate_hash) { Entry::Occupied(e) => // Just book keeping - we are already requesting that chunk: - e.relay_parents.insert(leaf), - Entry::Vacant(e) => { - e.insert(FetchTask::start(ctx, leaf, core)) + { + e.relay_parents.insert(leaf) } + Entry::Vacant(e) => e.insert(FetchTask::start(ctx, leaf, core)), } } } } -/// Start requesting our chunk for the given core. -fn start_request_chunk(core: OccupiedCore) -> FetchTask { - panic!("TODO: To be implemented!"); -} - -/// Query all hashes and descriptors of candidates pending availability at a particular block. -#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))] +///// Query all hashes and descriptors of candidates pending availability at a particular block. +// #[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))] async fn query_occupied_cores( ctx: &mut Context, relay_parent: Hash, ) -> Result> where - Context: SubsystemContext, + Context: SubsystemContext, { - let (tx, rx) = oneshot::channel(); - ctx.send_message(AllMessages::RuntimeApi(RuntimeApiMessage::Request( - relay_parent, - RuntimeApiRequest::AvailabilityCores(tx), - ))) - .await; - - let cores: Vec<_> = rx - .await - .map_err(|e| Error::AvailabilityCoresResponseChannel(e))? - .map_err(|e| Error::AvailabilityCores(e))?; + let cores = request_availability_cores_ctx(relay_parent, ctx).await?.await; Ok(cores .into_iter() diff --git a/node/subsystem-util/src/lib.rs b/node/subsystem-util/src/lib.rs index 1eee4cc7f758..ace6f90d80bc 100644 --- a/node/subsystem-util/src/lib.rs +++ b/node/subsystem-util/src/lib.rs @@ -321,7 +321,7 @@ impl Validator { .iter() .enumerate() .find(|(_, k)| k == &&key) - .map(|(idx, _)| idx as ValidatorIndex) + .map(|(idx, _)| ValidatorIndex(idx as u32)) .expect("signing_key would have already returned NotAValidator if the item we're searching for isn't in this list; qed"); Ok(Validator { diff --git a/primitives/src/v0.rs b/primitives/src/v0.rs index 951624566172..c8ea0fd6a014 100644 --- a/primitives/src/v0.rs +++ b/primitives/src/v0.rs @@ -114,8 +114,8 @@ impl MallocSizeOf for ValidatorId { } /// Index of the validator is used as a lightweight replacement of the `ValidatorId` when appropriate. -#[derive(Debug, Eq, Ord, PartialEq, PartialOrd, Hash)] -pub struct ValidatorIndex(u32); +#[derive(Debug, Eq, Ord, PartialEq, PartialOrd, Hash, Copy, Clone, Encode, Decode, MallocSizeOf)] +pub struct ValidatorIndex(pub u32); application_crypto::with_pair! { /// A Parachain validator keypair. From ef84ea5a447debadbdde2d41048ef304e1ecf6f6 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 12 Feb 2021 10:51:50 +0100 Subject: [PATCH 06/60] Only derive MallocSizeOf and Debug for std. --- primitives/src/v0.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/primitives/src/v0.rs b/primitives/src/v0.rs index c8ea0fd6a014..8c6b4f538a54 100644 --- a/primitives/src/v0.rs +++ b/primitives/src/v0.rs @@ -114,7 +114,9 @@ impl MallocSizeOf for ValidatorId { } /// Index of the validator is used as a lightweight replacement of the `ValidatorId` when appropriate. -#[derive(Debug, Eq, Ord, PartialEq, PartialOrd, Hash, Copy, Clone, Encode, Decode, MallocSizeOf)] +#[derive(Eq, Ord, PartialEq, PartialOrd, Hash, Copy, Clone, Encode, Decode)] +#[cfg(feature = "std")] +#[derive(Debug, MallocSizeOf)] pub struct ValidatorIndex(pub u32); application_crypto::with_pair! { From 1e3580413a35f99d403cbb36dc7f8d979e7d3f46 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 12 Feb 2021 13:59:13 +0100 Subject: [PATCH 07/60] availability-distribution: Cache feature complete. --- .../availability-distribution/Cargo.toml | 3 +- .../src/session_cache.rs | 62 +++++++++++++------ 2 files changed, 44 insertions(+), 21 deletions(-) diff --git a/node/network/availability-distribution/Cargo.toml b/node/network/availability-distribution/Cargo.toml index 1835078a3683..0ac0ed535418 100644 --- a/node/network/availability-distribution/Cargo.toml +++ b/node/network/availability-distribution/Cargo.toml @@ -15,15 +15,16 @@ polkadot-subsystem = { package = "polkadot-node-subsystem", path = "../../subsys polkadot-node-network-protocol = { path = "../../network/protocol" } polkadot-node-subsystem-util = { path = "../../subsystem-util" } polkadot-node-core-runtime-api = { path = "../../core/runtime-api" } +sp-application-crypto = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-core = { git = "https://github.com/paritytech/substrate", branch = "master", features = ["std"] } sp-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" } thiserror = "1.0.23" +itertools = "0.10.0" rand = "0.8.3" [dev-dependencies] polkadot-subsystem-testhelpers = { package = "polkadot-node-subsystem-test-helpers", path = "../../subsystem-test-helpers" } sp-core = { git = "https://github.com/paritytech/substrate", branch = "master", features = ["std"] } -sp-application-crypto = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-keyring = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master" } sc-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" } diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs index c5c34f35f002..0f12ee66173c 100644 --- a/node/network/availability-distribution/src/session_cache.rs +++ b/node/network/availability-distribution/src/session_cache.rs @@ -19,9 +19,10 @@ use std::rc::{Rc, Weak}; use rand::{seq::SliceRandom, thread_rng}; +use sp_application_crypto::AppKey; +use sp_core::crypto::Public; use sp_keystore::{CryptoStore, SyncCryptoStorePtr}; -use super::{error::Result, Error, LOG_TARGET}; use polkadot_node_subsystem_util::{ request_session_index_for_child_ctx, request_validator_groups_ctx, request_validators_ctx, }; @@ -30,11 +31,15 @@ use polkadot_primitives::v1::{ SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID, }; use polkadot_subsystem::{ - errors::{ChainApiError, RuntimeApiError}, jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem, Subsystem, SubsystemContext, SubsystemError, }; +use super::{ + error::{recv_runtime, Result}, + Error, LOG_TARGET, +}; + /// Caching of session info as needed by availability distribution. /// /// It should be ensured that a cached session stays live in the cache as long as we might need it. @@ -94,29 +99,35 @@ impl SessionCache { if let Some(info) = self.get_by_relay_parent(parent) { return Ok(Some(info)); } - let session_index = request_session_index_for_child_ctx(parent, ctx) - .await? - .await - .map_err(|e| Error::SessionCacheRuntimRequest(e))?; + let session_index = + recv_runtime(request_session_index_for_child_ctx(parent, ctx).await).await?; if let Some(info) = self.get_by_session_index(session_index) { - self.by_relay_parent.insert(parent, info.downgrade()); + self.by_relay_parent.insert(parent, Rc::downgrade(&info)); return Ok(Some(info)); } + + // About to fetch new stuff, time to get rid of dead bodies: We keep relay_parent to + // session info matches way longer than necessary (for an entire session), but the overhead + // should be low enough to not matter. + self.bury_dead(); if let Some((our_index, validators)) = self.query_validator_info(ctx, parent).await? { - let (mut validator_groups, _) = request_validator_groups_ctx(parent, ctx).await?.await?; + let (mut validator_groups, _) = + recv_runtime(request_validator_groups_ctx(parent, ctx).await).await?; + // Shuffle validators in groups: let mut rng = thread_rng(); for g in validator_groups.iter_mut() { - g.shuffle(&rng) + g.shuffle(&mut rng) } + let info = Rc::new(SessionInfo { validator_groups, validators, our_index, }); - let downgraded = info.downgrade(); - self.by_relay_parent.insert(parent, downgraded); - self.get_by_session_index.insert(session_index, downgraded); + let downgraded = Rc::downgrade(&info); + self.by_relay_parent.insert(parent, downgraded.clone()); + self.by_session_index.insert(session_index, downgraded); return Ok(Some(info)); } Ok(None) @@ -126,14 +137,14 @@ impl SessionCache { /// /// Returns: None, if no entry for that relay parent exists in the cache (or it was dead /// already - which should not happen.) - fn get_by_relay_parent(&self, relay_parent: Hash) -> Option> { - let weak_ref = self.by_relay_parent.get(relay_parent)?; + fn get_by_relay_parent(&self, parent: Hash) -> Option> { + let weak_ref = self.by_relay_parent.get(&parent)?; upgrade_report_dead(weak_ref) } /// Get session info for a given `SessionIndex`. fn get_by_session_index(&self, session_index: SessionIndex) -> Option> { - let weak_ref = self.by_session_index.get(session_index)?; + let weak_ref = self.by_session_index.get(&session_index)?; upgrade_report_dead(weak_ref) } @@ -142,26 +153,37 @@ impl SessionCache { /// Returns: Ok(None) if we are not a validator. async fn query_validator_info( &self, - &ctx: &mut Context, + ctx: &mut Context, parent: Hash, - ) -> Result)>> { - let validators = request_validators_ctx(ctx, parent).await?.await?; + ) -> Result)>> + where + Context: SubsystemContext, + { + let validators = recv_runtime(request_validators_ctx(parent, ctx).await).await?; for (i, v) in validators.iter().enumerate() { if CryptoStore::has_keys(&*self.keystore, &[(v.to_raw_vec(), ValidatorId::ID)]) .await { - return Ok(Some((i as ValidatorIndex, validators))); + return Ok(Some((ValidatorIndex(i as u32), validators))); } } Ok(None) } + + /// Get rid of the dead bodies from time to time. + fn bury_dead(&mut self) { + self.by_session_index + .retain(|_, info| info.upgrade().is_some()); + self.by_relay_parent + .retain(|_, info| info.upgrade().is_some()); + } } /// Upgrade a weak SessionInfo reference. /// /// Warn if it was dead already, as this should not happen. Cache should stay valid at least as /// long as we need it. -fn upgrade_report_dead(info: Weak) -> Option> { +fn upgrade_report_dead(info: &Weak) -> Option> { match info.upgrade() { Some(info) => Some(info), None => { From d8fda81ec6726172da3a72b6b0fb1b33539f9b22 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Sat, 13 Feb 2021 09:59:15 +0100 Subject: [PATCH 08/60] Sketch out logic in `FetchTask` for actual fetching. - Compile fixes. - Cleanup. --- .../availability-distribution/src/error.rs | 32 ++- .../src/fetch_task.rs | 216 +++++++++++++++--- .../src/session_cache.rs | 7 +- .../availability-distribution/src/state.rs | 51 +++-- 4 files changed, 247 insertions(+), 59 deletions(-) diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs index 68551dbad62f..0e29294b2c0c 100644 --- a/node/network/availability-distribution/src/error.rs +++ b/node/network/availability-distribution/src/error.rs @@ -20,7 +20,10 @@ use thiserror::Error; use futures::channel::oneshot; use polkadot_node_subsystem_util::Error as UtilError; -use polkadot_subsystem::{SubsystemError}; +use polkadot_subsystem::{ + errors::{ChainApiError, RuntimeApiError}, + SubsystemError, +}; #[derive(Debug, Error)] pub enum Error { @@ -36,9 +39,17 @@ pub enum Error { #[error("Receive channel closed")] IncomingMessageChannel(#[source] SubsystemError), - /// Some request to the runtime in the session cache failed. - #[error("Session cache runtime request failed")] - SessionCacheRuntimRequest(#[source] UtilError), + /// Some request to utility functions failed. + #[error("Runtime request failed")] + UtilRequest(#[source] UtilError), + + /// Some request to the runtime failed. + #[error("Runtime request failed")] + RuntimeRequestCanceled(#[source] oneshot::Canceled), + + /// Some request to the runtime failed. + #[error("Runtime request failed")] + RuntimeRequest(#[source] RuntimeApiError), } pub type Result = std::result::Result; @@ -48,3 +59,16 @@ impl From for Error { Self::IncomingMessageChannel(err) } } + +/// Receive a response from a runtime request and convert errors. +pub(crate) async fn recv_runtime( + r: std::result::Result< + oneshot::Receiver>, + UtilError, + >, +) -> Result { + r.map_err(Error::UtilRequest)? + .await + .map_err(Error::RuntimeRequestCanceled)? + .map_err(Error::RuntimeRequest) +} diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/fetch_task.rs index d84d24001aa9..e1c8409db625 100644 --- a/node/network/availability-distribution/src/fetch_task.rs +++ b/node/network/availability-distribution/src/fetch_task.rs @@ -14,25 +14,27 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use std::rc::Rc; use std::collections::HashSet; +use std::rc::Rc; use futures::channel::oneshot; +use v1::AvailabilityFetchingResponse; +use super::{session_cache::SessionInfo, LOG_TARGET}; +use polkadot_node_network_protocol::request_response::v1; use polkadot_primitives::v1::{ - BlakeTwo256, CoreState, ErasureChunk, Hash, HashT, - SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID, CandidateHash, - CandidateDescriptor, OccupiedCore, -}; -use polkadot_subsystem::{ - jaeger, errors::{ChainApiError, RuntimeApiError}, PerLeafSpan, - ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem, Subsystem, SubsystemContext, SubsystemError, + BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, ErasureChunk, Hash, HashT, + OccupiedCore, SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID, }; use polkadot_subsystem::messages::{ AllMessages, AvailabilityDistributionMessage, AvailabilityStoreMessage, ChainApiMessage, - NetworkBridgeMessage, RuntimeApiMessage, RuntimeApiRequest, NetworkBridgeEvent + NetworkBridgeEvent, NetworkBridgeMessage, RuntimeApiMessage, RuntimeApiRequest, +}; +use polkadot_subsystem::{ + errors::{ChainApiError, RuntimeApiError}, + jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem, + Subsystem, SubsystemContext, SubsystemError, }; -use super::{session_cache::SessionInfo, LOG_TARGET}; pub struct FetchTask { /// For what relay parents this task is relevant. @@ -42,37 +44,109 @@ pub struct FetchTask { /// stop keeping track of that candidate/chunk. live_in: HashSet, - /// The relay parent providing the context for the candidate. - relay_parent: Hash, - - /// Some details about the to be fetched candidate. - descriptor: CandidateDescriptor, - /// We keep the task around in state `Fetched` until `live_in` becomes empty, to make /// sure we won't re-fetch an already fetched candidate. state: FetchedState, - session: Rc + /// Session information. + session: Rc, } /// State of a particular candidate chunk fetching process. enum FetchedState { /// Chunk is currently being fetched. - Fetching, + /// + /// Once the contained `Sender` is dropped, any still running task will be canceled. + Fetching(oneshot::Sender<()>), /// Chunk has already been fetched successfully. Fetched, /// All relevant live_in have been removed, before we were able to get our chunk. Canceled, } +/// Messages sent from `FetchTask`s to be handled/forwarded. +pub enum FromFetchTask { + /// Message to other subsystem. + Message(AllMessages), + + /// Concluded with result. + /// + /// In case of `None` everything was fine, in case of `Some` some validators in the group + /// did not serve us our chunk as expected. + Concluded(Option), +} + +/// Report of bad validators. +pub struct BadValidators { + /// The session index that was used. + pub session_index: SessionIndex, + /// The group the not properly responding validators are. + pub group_index: GroupIndex, + /// The indeces of the bad validators. + pub bad_validators: Vec, +} + +/// Information a running task needs. +struct RunningTask { + /// For what session we have been spawned. + session_index: SessionIndex, + + /// Index of validator group. + group_index: GroupIndex, + + /// Validators to request the chunk from. + group: Vec, + + /// The request to send. + request: v1::AvailabilityFetchingRequest, + + /// Root hash, for verifying the chunks validity. + erasure_root: Hash, + + /// Relay parent of the candidate to fetch. + relay_parent: Hash, + + /// Sender for communicating with other subsystems and reporting results. + sender: mpsc::Sender, + + /// Receive `Canceled` errors here. + receiver: oneshot::Receiver<()>, +} + impl FetchTask { -// /// Start fetching a chunk. - // pub async fn start(ctx: &mut Context, leaf: Hash, core: OccupiedCore) -> Self - // where - // Context: SubsystemContext, - // { - // panic - // } + /// Start fetching a chunk. + pub async fn start( + ctx: &mut Context, + leaf: Hash, + core: OccupiedCore, + session_info: Rc, + sender: mpsc::Sender, + ) -> Self + where + Context: SubsystemContext, + { + let (handle, receiver) = oneshot::channel(); + let running = RunningTask { + session_index: session_info.session_index, + group_index: core.group_responsible, + group: session_info.validator_groups.get(core.group_responsible).expect("The responsible group of a candidate should be available in the corresponding session. qed.").clone(), + request: v1::AvailabilityFetchingRequest { + candidate_hash: core.candidate_hash, + index: session_info.our_index, + }, + erasure_root: core.candidate_descriptor.erasure_root, + relay_parent: core.candidate_descriptor.relay_parent, + sender, + receiver, + }; + ctx.spawn("chunk-fetcher", Pin::new(Box::new(running.run()))) + .await?; + FetchTask { + live_in: HashSet::from(leaf), + state: FetchedState::Fetching(handle), + session: session_info, + } + } /// Add the given leaf to the relay parents which are making this task relevant. pub fn add_leaf(&mut self, leaf: Hash) { @@ -101,6 +175,96 @@ impl FetchTask { /// Retrieve the relay parent providing the context for this candidate. pub fn get_relay_parent(&self) -> Hash { - self.relay_parent + self.descriptor.relay_parent + } +} + +/// Things that can go wrong in task execution. +#[derive(Debug)] +enum TaskError { + /// The peer failed to deliver a correct chunk for some reason (has been reported as + /// appropriate). + PeerError, + /// This very node is seemingly shutting down (sending of message failed). + ShuttingDown, +} + +type Result = std::result::Result; + +impl RunningTask { + async fn run(self) { + let bad_validators = Vec::new(); + // Try validators in order: + for index in self.group { + + // Send request: + let resp = match do_request(index).await { + Ok(resp) => resp, + Err(TaskError::ShuttingDown) => { + tracking::info("Node seems to be shutting down, canceling fetch task"); + return; + } + Err(TaskError::PeerError) => { + bad_validators.push(index); + continue + } + }; + + // Data valid? + if !self.validate_response(&resp) { + bad_validators.push(index); + continue + } + + // Ok, let's store it and be happy. + store_response(resp); + break; + } + conclude(bad_validators); + } + + /// Do request and return response, if successful. + /// + /// Will also report peer if not successful. + async fn do_request(&self, validator: ValidatorIndex) -> std::result::Result { + let peer = self.get_peer_id(index)?; + let (full_request, response_recv) = + Requests::AvailabilityFetching(OutgoingRequest::new(peer, self.request)); + + self.sender.send(FromFetchTask::Message( + AllMessages::NetworkBridgeMessage::SendRequests(Vec::from(full_request)), + )).await.map_err(|| TaskError::ShuttingDown)?; + + match response_recv.await { + Ok(resp) => Some(resp), + Err(RequestError::InvalidResponse(err)) => { + }, + Err(RequestError::NetworkError(err)) => { + } + Err(RequestError::Canceled(err)) => { + } + } + Err(PeerError) + } + + fn get_peer_id(index: ValidatorIndex) -> Result { + panic!("TO BE IMPLEMENTED"); + } + + /// Tell subsystem we are done. + async fn conclude(&self, bad_validators: Vec) { + let payload = if bad_validators.is_empty() { + None + } + else { + Some(BadValidators { + session_index: self.session_index, + group_index: self.group_index, + bad_validators, + }) + }; + if let Err(err) = self.sender.send(FromFetchTask::Concluded(payload)).await { + tracing::warn!(LOG_TARGET, err: ?err, "Sending concluded message for task failed"); + } } } diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs index 0f12ee66173c..043022f31fcc 100644 --- a/node/network/availability-distribution/src/session_cache.rs +++ b/node/network/availability-distribution/src/session_cache.rs @@ -66,6 +66,8 @@ pub struct SessionCache { /// Localized session information, tailored for the needs of availability distribution. pub struct SessionInfo { + /// The index of this session. + pub session_index: SessionIndex, /// Validator groups of the current session. /// /// Each group's order is randomized. This way we achieve load balancing when requesting @@ -73,11 +75,6 @@ pub struct SessionInfo { /// should arrive at a different order, therefore we distribute the load. pub validator_groups: Vec>, - /// All validators of that session. - /// - /// Needed for authority discovery and finding ourselves. - pub validators: Vec, - /// Information about ourself: pub our_index: ValidatorIndex, } diff --git a/node/network/availability-distribution/src/state.rs b/node/network/availability-distribution/src/state.rs index f47db8c94efa..c12a94dcb948 100644 --- a/node/network/availability-distribution/src/state.rs +++ b/node/network/availability-distribution/src/state.rs @@ -88,12 +88,6 @@ pub struct ProtocolState { session_cache: SessionCache, } -struct ChunkFetchingInfo { - descriptor: CandidateDescriptor, - /// Validators that backed the candidate and hopefully have our chunk. - backing_group: Vec, -} - impl ProtocolState { /// Update heads that need availability distribution. /// @@ -102,15 +96,18 @@ impl ProtocolState { &mut self, ctx: &mut Context, update: ActiveLeavesUpdate, - ) -> Result<()> { + ) -> Result<()> + where + Context: SubsystemContext, + { let ActiveLeavesUpdate { activated, deactivated, } = update; // Order important! We need to handle activated, prior to deactivated, otherwise we might // cancel still needed jobs. - self.start_requesting_chunks(ctx, activated)?; - let dead_parents = self.stop_requesting_chunks(ctx, deactivated)?; + self.start_requesting_chunks(ctx, activated).await?; + self.stop_requesting_chunks(ctx, deactivated)?; } /// Start requesting chunks for newly imported heads. @@ -124,7 +121,7 @@ impl ProtocolState { { for (leaf, _) in new_heads { let cores = query_occupied_cores(ctx, leaf).await?; - self.add_cores(ctx, leaf, cores)?; + self.add_cores(ctx, leaf, cores).await?; } Ok(()) } @@ -133,23 +130,22 @@ impl ProtocolState { /// /// Returns relay_parents which became irrelevant for availability fetching (are not /// referenced by any candidate anymore). - fn stop_requesting_chunks( + fn stop_requesting_chunks( &mut self, - ctx: &mut Context, obsolete_leaves: impl Iterator)>, ) -> Result> { let obsolete_leaves: HashSet<_> = obsolete_leaves.into_iter().map(|h| h.0).collect(); - let (obsolete_parents, new_fetches): (HashSet<_>, HashMap<_>) = - self.fetches.into_iter().partition_map(|(c_hash, task)| { + let new_fetches = + self.fetches.into_iter().filter_map(|(c_hash, task)| { task.remove_leaves(HashSet::from(obsolete_leaves)); if task.is_finished() { - Either::Left(task.get_relay_parent()) - } else { - Either::Right((c_hash, task)) + Some(task.get_relay_parent()) + } + else { + None } - }); + }).collect(); self.fetches = new_fetches; - obsolete_parents } /// Add candidates corresponding for a particular relay parent. @@ -164,15 +160,22 @@ impl ProtocolState { ctx: &mut Context, leaf: Hash, cores: impl IntoIterator, - ) { + ) + where + Context: SubsystemContext, + { for core in cores { match self.fetches.entry(core.candidate_hash) { Entry::Occupied(e) => // Just book keeping - we are already requesting that chunk: - { - e.relay_parents.insert(leaf) + e.get_mut().add_leaf(leaf), + Entry::Vacant(e) => { + let session_info = self.session_cache.fetch_session_info(ctx, core.candidate_descriptor.relay_parent)?; + if let Some(session_info) = session_info { + e.insert(FetchTask::start(ctx, leaf, core, session_info)) + } + // Not a validator, nothing to do. } - Entry::Vacant(e) => e.insert(FetchTask::start(ctx, leaf, core)), } } } @@ -187,7 +190,7 @@ async fn query_occupied_cores( where Context: SubsystemContext, { - let cores = request_availability_cores_ctx(relay_parent, ctx).await?.await; + let cores = recv_runtime(request_availability_cores_ctx(relay_parent, ctx).await).await?; Ok(cores .into_iter() From 47036c9840ca67c3bafb9d1aacdc50e53c99da74 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Sat, 13 Feb 2021 10:03:53 +0100 Subject: [PATCH 09/60] Format cleanup. --- .../src/fetch_task.rs | 39 +++++++++++-------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/fetch_task.rs index e1c8409db625..2be986a7d5c7 100644 --- a/node/network/availability-distribution/src/fetch_task.rs +++ b/node/network/availability-distribution/src/fetch_task.rs @@ -196,7 +196,6 @@ impl RunningTask { let bad_validators = Vec::new(); // Try validators in order: for index in self.group { - // Send request: let resp = match do_request(index).await { Ok(resp) => resp, @@ -206,14 +205,14 @@ impl RunningTask { } Err(TaskError::PeerError) => { bad_validators.push(index); - continue + continue; } }; // Data valid? if !self.validate_response(&resp) { bad_validators.push(index); - continue + continue; } // Ok, let's store it and be happy. @@ -226,23 +225,26 @@ impl RunningTask { /// Do request and return response, if successful. /// /// Will also report peer if not successful. - async fn do_request(&self, validator: ValidatorIndex) -> std::result::Result { + async fn do_request( + &self, + validator: ValidatorIndex, + ) -> std::result::Result { let peer = self.get_peer_id(index)?; let (full_request, response_recv) = Requests::AvailabilityFetching(OutgoingRequest::new(peer, self.request)); - self.sender.send(FromFetchTask::Message( - AllMessages::NetworkBridgeMessage::SendRequests(Vec::from(full_request)), - )).await.map_err(|| TaskError::ShuttingDown)?; + self.sender + .send(FromFetchTask::Message( + AllMessages::NetworkBridgeMessage::SendRequests(Vec::from(full_request)), + )) + .await + .map_err(|| TaskError::ShuttingDown)?; match response_recv.await { Ok(resp) => Some(resp), - Err(RequestError::InvalidResponse(err)) => { - }, - Err(RequestError::NetworkError(err)) => { - } - Err(RequestError::Canceled(err)) => { - } + Err(RequestError::InvalidResponse(err)) => {} + Err(RequestError::NetworkError(err)) => {} + Err(RequestError::Canceled(err)) => {} } Err(PeerError) } @@ -255,16 +257,19 @@ impl RunningTask { async fn conclude(&self, bad_validators: Vec) { let payload = if bad_validators.is_empty() { None - } - else { + } else { Some(BadValidators { session_index: self.session_index, group_index: self.group_index, bad_validators, }) }; - if let Err(err) = self.sender.send(FromFetchTask::Concluded(payload)).await { - tracing::warn!(LOG_TARGET, err: ?err, "Sending concluded message for task failed"); + if let Err(err) = self.sender.send(FromFetchTask::Concluded(payload)).await { + tracing::warn!( + LOG_TARGET, + err: ?err, + "Sending concluded message for task failed" + ); } } } From 4ad902fd985535b13bc3ffb34b9523f560f4e2f2 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Sat, 13 Feb 2021 10:07:54 +0100 Subject: [PATCH 10/60] More format fixes. --- .../src/fetch_task.rs | 1 + .../availability-distribution/src/state.rs | 33 +++++++++++-------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/fetch_task.rs index 2be986a7d5c7..c2421c0638c4 100644 --- a/node/network/availability-distribution/src/fetch_task.rs +++ b/node/network/availability-distribution/src/fetch_task.rs @@ -240,6 +240,7 @@ impl RunningTask { .await .map_err(|| TaskError::ShuttingDown)?; + // TODO: Also handle receiver cancel. match response_recv.await { Ok(resp) => Some(resp), Err(RequestError::InvalidResponse(err)) => {} diff --git a/node/network/availability-distribution/src/state.rs b/node/network/availability-distribution/src/state.rs index c12a94dcb948..2334902af070 100644 --- a/node/network/availability-distribution/src/state.rs +++ b/node/network/availability-distribution/src/state.rs @@ -66,16 +66,18 @@ use jaeger::JaegerSpan; use itertools::{Either, Itertools}; use super::{fetch_task::FetchTask, session_cache::SessionCache, Result, LOG_TARGET}; +use polkadot_node_subsystem_util::request_availability_cores_ctx; use polkadot_primitives::v1::{ BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, ErasureChunk, Hash, HashT, OccupiedCore, SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID, }; use polkadot_subsystem::{ errors::{ChainApiError, RuntimeApiError}, - jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem, - Subsystem, SubsystemContext, SubsystemError, messages::AvailabilityDistributionMessage, + jaeger, + messages::AvailabilityDistributionMessage, + ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem, Subsystem, + SubsystemContext, SubsystemError, }; -use polkadot_node_subsystem_util::request_availability_cores_ctx; /// A running instance of this subsystem. pub struct ProtocolState { @@ -96,7 +98,7 @@ impl ProtocolState { &mut self, ctx: &mut Context, update: ActiveLeavesUpdate, - ) -> Result<()> + ) -> Result<()> where Context: SubsystemContext, { @@ -135,16 +137,18 @@ impl ProtocolState { obsolete_leaves: impl Iterator)>, ) -> Result> { let obsolete_leaves: HashSet<_> = obsolete_leaves.into_iter().map(|h| h.0).collect(); - let new_fetches = - self.fetches.into_iter().filter_map(|(c_hash, task)| { + let new_fetches = self + .fetches + .into_iter() + .filter_map(|(c_hash, task)| { task.remove_leaves(HashSet::from(obsolete_leaves)); if task.is_finished() { Some(task.get_relay_parent()) - } - else { + } else { None } - }).collect(); + }) + .collect(); self.fetches = new_fetches; } @@ -160,17 +164,20 @@ impl ProtocolState { ctx: &mut Context, leaf: Hash, cores: impl IntoIterator, - ) - where + ) where Context: SubsystemContext, { for core in cores { match self.fetches.entry(core.candidate_hash) { Entry::Occupied(e) => // Just book keeping - we are already requesting that chunk: - e.get_mut().add_leaf(leaf), + { + e.get_mut().add_leaf(leaf) + } Entry::Vacant(e) => { - let session_info = self.session_cache.fetch_session_info(ctx, core.candidate_descriptor.relay_parent)?; + let session_info = self + .session_cache + .fetch_session_info(ctx, core.candidate_descriptor.relay_parent)?; if let Some(session_info) = session_info { e.insert(FetchTask::start(ctx, leaf, core, session_info)) } From fee9476e5665887b89e02672aae5da40a4eede6b Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Mon, 15 Feb 2021 15:08:06 +0100 Subject: [PATCH 11/60] Almost feature complete `fetch_task`. Missing: - Check for cancel - Actual querying of peer ids. --- .../availability-distribution/Cargo.toml | 1 + .../src/fetch_task.rs | 146 +++++++++++++----- 2 files changed, 110 insertions(+), 37 deletions(-) diff --git a/node/network/availability-distribution/Cargo.toml b/node/network/availability-distribution/Cargo.toml index 0ac0ed535418..03cd654d6f0a 100644 --- a/node/network/availability-distribution/Cargo.toml +++ b/node/network/availability-distribution/Cargo.toml @@ -28,5 +28,6 @@ sp-core = { git = "https://github.com/paritytech/substrate", branch = "master", sp-keyring = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master" } sc-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" } +sc-network = { git = "https://github.com/paritytech/substrate", branch = "master" } assert_matches = "1.4.0" maplit = "1.0" diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/fetch_task.rs index c2421c0638c4..22558fea2e5d 100644 --- a/node/network/availability-distribution/src/fetch_task.rs +++ b/node/network/availability-distribution/src/fetch_task.rs @@ -15,16 +15,22 @@ // along with Polkadot. If not, see . use std::collections::HashSet; +use std::pin::Pin; use std::rc::Rc; +use futures::channel::mpsc; use futures::channel::oneshot; -use v1::AvailabilityFetchingResponse; -use super::{session_cache::SessionInfo, LOG_TARGET}; -use polkadot_node_network_protocol::request_response::v1; +use sc_network::PeerId; + +use polkadot_erasure_coding::branch_hash; +use polkadot_node_network_protocol::request_response::{ + request::{OutgoingRequest, RequestError, Requests}, + v1::{AvailabilityFetchingRequest, AvailabilityFetchingResponse}, +}; use polkadot_primitives::v1::{ - BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, ErasureChunk, Hash, HashT, - OccupiedCore, SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID, + BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, ErasureChunk, GroupIndex, Hash, + HashT, OccupiedCore, SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID, }; use polkadot_subsystem::messages::{ AllMessages, AvailabilityDistributionMessage, AvailabilityStoreMessage, ChainApiMessage, @@ -36,6 +42,8 @@ use polkadot_subsystem::{ Subsystem, SubsystemContext, SubsystemError, }; +use super::{session_cache::SessionInfo, LOG_TARGET}; + pub struct FetchTask { /// For what relay parents this task is relevant. /// @@ -54,12 +62,10 @@ pub struct FetchTask { /// State of a particular candidate chunk fetching process. enum FetchedState { - /// Chunk is currently being fetched. + /// Chunk fetch has started. /// /// Once the contained `Sender` is dropped, any still running task will be canceled. - Fetching(oneshot::Sender<()>), - /// Chunk has already been fetched successfully. - Fetched, + Started(oneshot::Sender<()>), /// All relevant live_in have been removed, before we were able to get our chunk. Canceled, } @@ -98,7 +104,7 @@ struct RunningTask { group: Vec, /// The request to send. - request: v1::AvailabilityFetchingRequest, + request: AvailabilityFetchingRequest, /// Root hash, for verifying the chunks validity. erasure_root: Hash, @@ -106,6 +112,9 @@ struct RunningTask { /// Relay parent of the candidate to fetch. relay_parent: Hash, + /// Hash of the candidate we are fetching our chunk for. + candidate_hash: CandidateHash, + /// Sender for communicating with other subsystems and reporting results. sender: mpsc::Sender, @@ -130,12 +139,13 @@ impl FetchTask { session_index: session_info.session_index, group_index: core.group_responsible, group: session_info.validator_groups.get(core.group_responsible).expect("The responsible group of a candidate should be available in the corresponding session. qed.").clone(), - request: v1::AvailabilityFetchingRequest { + request: AvailabilityFetchingRequest { candidate_hash: core.candidate_hash, index: session_info.our_index, }, erasure_root: core.candidate_descriptor.erasure_root, relay_parent: core.candidate_descriptor.relay_parent, + candidate_hash: core.candidate_hash, sender, receiver, }; @@ -143,7 +153,7 @@ impl FetchTask { .await?; FetchTask { live_in: HashSet::from(leaf), - state: FetchedState::Fetching(handle), + state: FetchedState::Started(handle), session: session_info, } } @@ -165,11 +175,11 @@ impl FetchTask { /// Whether or not this task can be considered finished. /// - /// That is, it is either canceled or succeeded fetching the chunk. + /// That is, it is either canceled, succeeded or failed. pub fn is_finished(&self) -> bool { match self.state { - FetchedState::Fetched | FetchedState::Canceled => true, - FetchedState::Fetching => false, + FetchedState::Canceled => true, + FetchedState::Started(sender) => sender.is_canceled(), } } @@ -192,15 +202,22 @@ enum TaskError { type Result = std::result::Result; impl RunningTask { + /// Fetch and store chunk. + /// + /// Try validators in backing group in order. async fn run(self) { let bad_validators = Vec::new(); // Try validators in order: for index in self.group { // Send request: - let resp = match do_request(index).await { + let peer_id = self.get_peer_id(index)?; + let resp = match self.do_request(peer_id).await { Ok(resp) => resp, Err(TaskError::ShuttingDown) => { - tracking::info("Node seems to be shutting down, canceling fetch task"); + tracing::info!( + target: LOG_TARGET, + "Node seems to be shutting down, canceling fetch task" + ); return; } Err(TaskError::PeerError) => { @@ -208,30 +225,30 @@ impl RunningTask { continue; } }; + let chunk = match resp { + AvailabilityFetchingResponse::Chunk(chunk) => chunk, + }; - // Data valid? - if !self.validate_response(&resp) { + // Data genuine? + if !self.validate_chunk(peer_id, &chunk) { bad_validators.push(index); continue; } - // Ok, let's store it and be happy. - store_response(resp); + // Ok, let's store it and be happy: + self.store_chunk(chunk).await; break; } - conclude(bad_validators); + self.conclude(bad_validators); } /// Do request and return response, if successful. - /// - /// Will also report peer if not successful. async fn do_request( &self, - validator: ValidatorIndex, - ) -> std::result::Result { - let peer = self.get_peer_id(index)?; - let (full_request, response_recv) = - Requests::AvailabilityFetching(OutgoingRequest::new(peer, self.request)); + peer: PeerId, + ) -> std::result::Result { + let (full_request, response_recv) = OutgoingRequest::new(peer, self.request); + let requests = Requests::AvailabilityFetching(Vec::from(full_request)); self.sender .send(FromFetchTask::Message( @@ -240,20 +257,75 @@ impl RunningTask { .await .map_err(|| TaskError::ShuttingDown)?; - // TODO: Also handle receiver cancel. match response_recv.await { - Ok(resp) => Some(resp), - Err(RequestError::InvalidResponse(err)) => {} - Err(RequestError::NetworkError(err)) => {} - Err(RequestError::Canceled(err)) => {} + Ok(resp) => Ok(resp), + Err(RequestError::InvalidResponse(err)) => { + tracing::warn!( + target: LOG_TARGET, + "Peer sent us invalid erasure chunk data" + ); + Err(TaskError::PeerError) + } + Err(RequestError::NetworkError(err)) => { + tracing::warn!( + target: LOG_TARGET, + "Some network error occurred when fetching erasure chunk" + ); + Err(TaskError::PeerError) + } + Err(RequestError::Canceled(err)) => { + tracing::warn!(target: LOG_TARGET, "Erasure chunk request got canceled"); + Err(TaskError::PeerError) + } + } + } + + fn validate_chunk(&self, peer_id: &PeerId, chunk: &ErasureChunk) -> bool { + let anticipated_hash = + match branch_hash(&self.erasure_root, &chunk.proof, chunk.index as usize) { + Ok(hash) => hash, + Err(e) => { + tracing::trace!( + target: LOG_TARGET, + candidate_hash = ?self.candidate_hash, + origin = ?peer_id, + error = ?e, + "Failed to calculate chunk merkle proof", + ); + return false; + } + }; + let erasure_chunk_hash = BlakeTwo256::hash(&chunk.chunk); + if anticipated_hash != erasure_chunk_hash { + tracing::warn!(target: LOG_TARGET, origin = ?peer_id, "Received chunk does not match merkle tree"); + return false; } - Err(PeerError) + true } fn get_peer_id(index: ValidatorIndex) -> Result { panic!("TO BE IMPLEMENTED"); } + /// Store given chunk and log any error. + async fn store_chunk(&self, chunk: ErasureChunk) { + let (tx, rx) = oneshot::channel(); + self.sender + .send(FromFetchTask::Message(AllMessages::AvailabilityStore( + AvailabilityStoreMessage::StoreChunk { + candidate_hash: self.candidate_hash, + relay_parent: self.relay_parent, + chunk, + tx, + }, + ))) + .await; + + if let Err(oneshot::Canceled) = rx.await { + tracing::error!(target: LOG_TARGET, "Storing erasure chunk failed"); + } + } + /// Tell subsystem we are done. async fn conclude(&self, bad_validators: Vec) { let payload = if bad_validators.is_empty() { @@ -267,8 +339,8 @@ impl RunningTask { }; if let Err(err) = self.sender.send(FromFetchTask::Concluded(payload)).await { tracing::warn!( - LOG_TARGET, - err: ?err, + target: LOG_TARGET, + err= ?err, "Sending concluded message for task failed" ); } From b9aa906dfe8687e0963f77769b66dc55eeeeafb4 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Mon, 15 Feb 2021 22:35:57 +0100 Subject: [PATCH 12/60] Finish FetchTask so far. --- .../src/fetch_task.rs | 64 +++++++++++-------- .../src/session_cache.rs | 9 ++- .../availability-distribution/src/state.rs | 51 ++++++++------- 3 files changed, 72 insertions(+), 52 deletions(-) diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/fetch_task.rs index 22558fea2e5d..f482593fb9ea 100644 --- a/node/network/availability-distribution/src/fetch_task.rs +++ b/node/network/availability-distribution/src/fetch_task.rs @@ -20,6 +20,8 @@ use std::rc::Rc; use futures::channel::mpsc; use futures::channel::oneshot; +use futures::future::select; +use futures::SinkExt; use sc_network::PeerId; @@ -39,7 +41,7 @@ use polkadot_subsystem::messages::{ use polkadot_subsystem::{ errors::{ChainApiError, RuntimeApiError}, jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem, - Subsystem, SubsystemContext, SubsystemError, + Subsystem, SubsystemContext, SubsystemError, SubsystemResult }; use super::{session_cache::SessionInfo, LOG_TARGET}; @@ -117,9 +119,6 @@ struct RunningTask { /// Sender for communicating with other subsystems and reporting results. sender: mpsc::Sender, - - /// Receive `Canceled` errors here. - receiver: oneshot::Receiver<()>, } impl FetchTask { @@ -130,15 +129,15 @@ impl FetchTask { core: OccupiedCore, session_info: Rc, sender: mpsc::Sender, - ) -> Self + ) -> SubsystemResult where Context: SubsystemContext, { - let (handle, receiver) = oneshot::channel(); + let (handle, kill) = oneshot::channel(); let running = RunningTask { session_index: session_info.session_index, group_index: core.group_responsible, - group: session_info.validator_groups.get(core.group_responsible).expect("The responsible group of a candidate should be available in the corresponding session. qed.").clone(), + group: session_info.validator_groups.get(core.group_responsible.into() as usize).expect("The responsible group of a candidate should be available in the corresponding session. qed.").clone(), request: AvailabilityFetchingRequest { candidate_hash: core.candidate_hash, index: session_info.our_index, @@ -147,15 +146,14 @@ impl FetchTask { relay_parent: core.candidate_descriptor.relay_parent, candidate_hash: core.candidate_hash, sender, - receiver, }; - ctx.spawn("chunk-fetcher", Pin::new(Box::new(running.run()))) + ctx.spawn("chunk-fetcher", Pin::new(Box::new(running.run(kill)))) .await?; - FetchTask { - live_in: HashSet::from(leaf), + Ok(FetchTask { + live_in: vec![leaf].into_iter().collect(), state: FetchedState::Started(handle), session: session_info, - } + }) } /// Add the given leaf to the relay parents which are making this task relevant. @@ -166,9 +164,8 @@ impl FetchTask { /// Remove leaves and cancel the task, if it was the last one and the task has still been /// fetching. pub fn remove_leaves(&mut self, leaves: HashSet) { - self.live_in.difference(leaves); + self.live_in.difference(&leaves); if self.live_in.is_empty() { - // TODO: Make sure, to actually cancel the task. self.state = FetchedState::Canceled } } @@ -183,9 +180,10 @@ impl FetchTask { } } - /// Retrieve the relay parent providing the context for this candidate. - pub fn get_relay_parent(&self) -> Hash { - self.descriptor.relay_parent + /// Whether or not there are still relay parents around with this candidate pending + /// availability. + pub fn is_live(&self) -> bool { + !self.live_in.is_empty() } } @@ -202,15 +200,31 @@ enum TaskError { type Result = std::result::Result; impl RunningTask { + async fn run(self, kill: oneshot::Receiver<()>) { + // Wait for completion/or cancel. + let _ = select(self.run_inner(), kill); + } + /// Fetch and store chunk. /// /// Try validators in backing group in order. - async fn run(self) { + async fn run_inner(self) { let bad_validators = Vec::new(); // Try validators in order: for index in self.group { // Send request: - let peer_id = self.get_peer_id(index)?; + let peer_id = match self.get_peer_id(index).await { + Ok(peer_id) => peer_id, + Err(err) => { + tracing::warn!( + target: LOG_TARGET, + validator_index = ?index, + "Discoverying peer id for validator failed" + ); + bad_validators.push(index); + continue + } + }; let resp = match self.do_request(peer_id).await { Ok(resp) => resp, Err(TaskError::ShuttingDown) => { @@ -248,14 +262,14 @@ impl RunningTask { peer: PeerId, ) -> std::result::Result { let (full_request, response_recv) = OutgoingRequest::new(peer, self.request); - let requests = Requests::AvailabilityFetching(Vec::from(full_request)); + let requests = Requests::AvailabilityFetching(full_request); self.sender .send(FromFetchTask::Message( - AllMessages::NetworkBridgeMessage::SendRequests(Vec::from(full_request)), + AllMessages::NetworkBridge(NetworkBridgeMessage::SendRequests(vec![requests])), )) .await - .map_err(|| TaskError::ShuttingDown)?; + .map_err(|_| TaskError::ShuttingDown)?; match response_recv.await { Ok(resp) => Ok(resp), @@ -303,12 +317,12 @@ impl RunningTask { true } - fn get_peer_id(index: ValidatorIndex) -> Result { + fn get_peer_id(&self, index: ValidatorIndex) -> Result { panic!("TO BE IMPLEMENTED"); } /// Store given chunk and log any error. - async fn store_chunk(&self, chunk: ErasureChunk) { + async fn store_chunk(&mut self, chunk: ErasureChunk) { let (tx, rx) = oneshot::channel(); self.sender .send(FromFetchTask::Message(AllMessages::AvailabilityStore( @@ -327,7 +341,7 @@ impl RunningTask { } /// Tell subsystem we are done. - async fn conclude(&self, bad_validators: Vec) { + async fn conclude(&mut self, bad_validators: Vec) { let payload = if bad_validators.is_empty() { None } else { diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs index 043022f31fcc..a0957c6c3916 100644 --- a/node/network/availability-distribution/src/session_cache.rs +++ b/node/network/availability-distribution/src/session_cache.rs @@ -80,6 +80,13 @@ pub struct SessionInfo { } impl SessionCache { + pub(crate) fn new(keystore: SyncCryptoStorePtr) -> Self { + SessionCache { + by_relay_parent: HashMap::new(), + by_session_index: HashMap::new(), + keystore, + } + } /// Retrieve session info for the given relay parent. /// /// This function will query the cache first and will only query the runtime on cache miss. @@ -119,8 +126,8 @@ impl SessionCache { let info = Rc::new(SessionInfo { validator_groups, - validators, our_index, + session_index, }); let downgraded = Rc::downgrade(&info); self.by_relay_parent.insert(parent, downgraded.clone()); diff --git a/node/network/availability-distribution/src/state.rs b/node/network/availability-distribution/src/state.rs index 2334902af070..3668ae6310c4 100644 --- a/node/network/availability-distribution/src/state.rs +++ b/node/network/availability-distribution/src/state.rs @@ -65,7 +65,6 @@ use jaeger::JaegerSpan; use itertools::{Either, Itertools}; -use super::{fetch_task::FetchTask, session_cache::SessionCache, Result, LOG_TARGET}; use polkadot_node_subsystem_util::request_availability_cores_ctx; use polkadot_primitives::v1::{ BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, ErasureChunk, Hash, HashT, @@ -79,6 +78,8 @@ use polkadot_subsystem::{ SubsystemContext, SubsystemError, }; +use super::{fetch_task::FetchTask, session_cache::SessionCache, Result, LOG_TARGET, error::recv_runtime}; + /// A running instance of this subsystem. pub struct ProtocolState { /// Candidates we need to fetch our chunk for. @@ -91,10 +92,16 @@ pub struct ProtocolState { } impl ProtocolState { + pub(crate) fn new(keystore: SyncCryptoStorePtr) -> Self { + ProtocolState { + fetches: HashMap::new(), + session_cache: SessionCache::new(keystore), + } + } /// Update heads that need availability distribution. /// /// For all active heads we will be fetching our chunk for availabilty distribution. - pub(crate) fn update_fetching_heads( + pub(crate) async fn update_fetching_heads( &mut self, ctx: &mut Context, update: ActiveLeavesUpdate, @@ -108,8 +115,9 @@ impl ProtocolState { } = update; // Order important! We need to handle activated, prior to deactivated, otherwise we might // cancel still needed jobs. - self.start_requesting_chunks(ctx, activated).await?; - self.stop_requesting_chunks(ctx, deactivated)?; + self.start_requesting_chunks(ctx, activated.into_iter()).await?; + self.stop_requesting_chunks(deactivated.into_iter()); + Ok(()) } /// Start requesting chunks for newly imported heads. @@ -119,7 +127,7 @@ impl ProtocolState { new_heads: impl Iterator)>, ) -> Result<()> where - Context: SubsystemContext + Sync + Send, + Context: SubsystemContext, { for (leaf, _) in new_heads { let cores = query_occupied_cores(ctx, leaf).await?; @@ -134,22 +142,13 @@ impl ProtocolState { /// referenced by any candidate anymore). fn stop_requesting_chunks( &mut self, - obsolete_leaves: impl Iterator)>, - ) -> Result> { + obsolete_leaves: impl Iterator, + ) { let obsolete_leaves: HashSet<_> = obsolete_leaves.into_iter().map(|h| h.0).collect(); - let new_fetches = self - .fetches - .into_iter() - .filter_map(|(c_hash, task)| { - task.remove_leaves(HashSet::from(obsolete_leaves)); - if task.is_finished() { - Some(task.get_relay_parent()) - } else { - None - } - }) - .collect(); - self.fetches = new_fetches; + self.fetches.retain(|&c_hash, task| { + task.remove_leaves(obsolete_leaves); + task.is_live() + }) } /// Add candidates corresponding for a particular relay parent. @@ -159,25 +158,24 @@ impl ProtocolState { /// Note: The passed in `leaf` is not the same as CandidateDescriptor::relay_parent in the /// given cores. The latter is the relay_parent this candidate considers its parent, while the /// passed in leaf might be some later block where the candidate is still pending availability. - fn add_cores( + async fn add_cores( &mut self, ctx: &mut Context, leaf: Hash, cores: impl IntoIterator, - ) where + ) -> Result<()> + where Context: SubsystemContext, { for core in cores { match self.fetches.entry(core.candidate_hash) { Entry::Occupied(e) => // Just book keeping - we are already requesting that chunk: - { - e.get_mut().add_leaf(leaf) - } + e.get_mut().add_leaf(leaf), Entry::Vacant(e) => { let session_info = self .session_cache - .fetch_session_info(ctx, core.candidate_descriptor.relay_parent)?; + .fetch_session_info(ctx, core.candidate_descriptor.relay_parent).await?; if let Some(session_info) = session_info { e.insert(FetchTask::start(ctx, leaf, core, session_info)) } @@ -185,6 +183,7 @@ impl ProtocolState { } } } + Ok(()) } } From a65562f5b067ee9e91e6e01dcc14f259b09f3f23 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Tue, 16 Feb 2021 16:01:21 +0100 Subject: [PATCH 13/60] Directly use AuthorityDiscoveryId in protocol and cache. --- .../availability-distribution/src/error.rs | 5 + .../src/session_cache.rs | 94 +++++++++++++------ .../protocol/src/request_response/request.rs | 6 +- 3 files changed, 76 insertions(+), 29 deletions(-) diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs index 0e29294b2c0c..22f363f6584f 100644 --- a/node/network/availability-distribution/src/error.rs +++ b/node/network/availability-distribution/src/error.rs @@ -20,6 +20,7 @@ use thiserror::Error; use futures::channel::oneshot; use polkadot_node_subsystem_util::Error as UtilError; +use polkadot_primitives::v1::SessionIndex; use polkadot_subsystem::{ errors::{ChainApiError, RuntimeApiError}, SubsystemError, @@ -50,6 +51,10 @@ pub enum Error { /// Some request to the runtime failed. #[error("Runtime request failed")] RuntimeRequest(#[source] RuntimeApiError), + + /// We tried fetching a session which was not available. + #[error("No such session")] + NoSuchSession(SessionIndex), } pub type Result = std::result::Result; diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs index a0957c6c3916..f31984f62479 100644 --- a/node/network/availability-distribution/src/session_cache.rs +++ b/node/network/availability-distribution/src/session_cache.rs @@ -24,11 +24,12 @@ use sp_core::crypto::Public; use sp_keystore::{CryptoStore, SyncCryptoStorePtr}; use polkadot_node_subsystem_util::{ - request_session_index_for_child_ctx, request_validator_groups_ctx, request_validators_ctx, + request_session_index_for_child_ctx, request_session_info_ctx, }; +use polkadot_primitives::v1::SessionInfo as GlobalSessionInfo; use polkadot_primitives::v1::{ BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, ErasureChunk, Hash, HashT, - SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID, + SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID, AuthorityDiscoveryId }; use polkadot_subsystem::{ jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem, @@ -73,10 +74,14 @@ pub struct SessionInfo { /// Each group's order is randomized. This way we achieve load balancing when requesting /// chunks, as the validators in a group will be tried in that randomized order. Each node /// should arrive at a different order, therefore we distribute the load. - pub validator_groups: Vec>, + pub validator_groups: Vec>, /// Information about ourself: pub our_index: ValidatorIndex, + //// Remember to which group we blong, so we won't start fetching chunks for candidates we + //// backed our selves. + // TODO: Implement this: + // pub our_group: GroupIndex, } impl SessionCache { @@ -114,24 +119,14 @@ impl SessionCache { // session info matches way longer than necessary (for an entire session), but the overhead // should be low enough to not matter. self.bury_dead(); - if let Some((our_index, validators)) = self.query_validator_info(ctx, parent).await? { - let (mut validator_groups, _) = - recv_runtime(request_validator_groups_ctx(parent, ctx).await).await?; - // Shuffle validators in groups: - let mut rng = thread_rng(); - for g in validator_groups.iter_mut() { - g.shuffle(&mut rng) - } - - let info = Rc::new(SessionInfo { - validator_groups, - our_index, - session_index, - }); - let downgraded = Rc::downgrade(&info); - self.by_relay_parent.insert(parent, downgraded.clone()); - self.by_session_index.insert(session_index, downgraded); + if let Some(info) = self + .query_info_from_runtime(ctx, parent, session_index) + .await? + { + self.by_relay_parent.insert(parent, Rc::downgrade(&info)); + self.by_session_index + .insert(session_index, Rc::downgrade(&info)); return Ok(Some(info)); } Ok(None) @@ -152,26 +147,71 @@ impl SessionCache { upgrade_report_dead(weak_ref) } - /// Get our validator id and the validators in the current session. + /// Query needed information from runtime. /// - /// Returns: Ok(None) if we are not a validator. - async fn query_validator_info( + /// We need to pass in the relay parent for our call to `request_session_info_ctx`. We should + /// actually don't need that, I suppose it is used for internal caching based on relay parents, + /// which we don't use here. It should not do any harm though. + async fn query_info_from_runtime( &self, ctx: &mut Context, parent: Hash, - ) -> Result)>> + session_index: SessionIndex, + ) -> Result>> where Context: SubsystemContext, { - let validators = recv_runtime(request_validators_ctx(parent, ctx).await).await?; + let GlobalSessionInfo { + validators, + discovery_keys, + mut validator_groups, + .. + } = recv_runtime(request_session_info_ctx(parent, session_index, ctx).await) + .await? + .ok_or(Error::NoSuchSession(session_index))?; + + if let Some(our_index) = self.get_our_index(validators).await { + // Shuffle validators in groups: + let mut rng = thread_rng(); + for g in validator_groups.iter_mut() { + g.shuffle(&mut rng) + } + // Look up `AuthorityDiscoveryId`s right away: + let validator_groups: Vec> = validator_groups + .into_iter() + .map(|group| { + group + .into_iter() + .map(|index| { + discovery_keys.get(Into::into(index) as usize) + .expect("There should be a discovery key for each validator of each validator group. qed.").clone() + }) + .collect() + }) + .collect(); + + let info = Rc::new(SessionInfo { + validator_groups, + our_index, + session_index, + }); + return Ok(Some(info)); + } + return Ok(None); + } + + /// Get our validator id and the validators in the current session. + /// + /// Returns: Ok(None) if we are not a validator. + async fn get_our_index(&self, validators: Vec) -> Option { for (i, v) in validators.iter().enumerate() { if CryptoStore::has_keys(&*self.keystore, &[(v.to_raw_vec(), ValidatorId::ID)]) .await { - return Ok(Some((ValidatorIndex(i as u32), validators))); + return Some(ValidatorIndex(i as u32)); } } - Ok(None) + None } /// Get rid of the dead bodies from time to time. diff --git a/node/network/protocol/src/request_response/request.rs b/node/network/protocol/src/request_response/request.rs index a37ff8d2eaa1..2f086510177d 100644 --- a/node/network/protocol/src/request_response/request.rs +++ b/node/network/protocol/src/request_response/request.rs @@ -22,6 +22,8 @@ use sc_network as network; use sc_network::config as netconfig; use sc_network::PeerId; +use polkadot_primitives::v1::AuthorityDiscoveryId; + use super::{v1, Protocol}; /// Common properties of any `Request`. @@ -69,7 +71,7 @@ impl Requests { #[derive(Debug)] pub struct OutgoingRequest { /// Intendent recipient of this request. - pub peer: PeerId, + pub peer: AuthorityDiscoveryId, /// The actual request to send over the wire. pub payload: Req, /// Sender which is used by networking to get us back a response. @@ -98,7 +100,7 @@ where /// It will contain a sender that is used by the networking for sending back responses. The /// connected receiver is returned as the second element in the returned tuple. pub fn new( - peer: PeerId, + peer: AuthorityDiscoveryId, payload: Req, ) -> ( Self, From 4a4356108fd4a887a51ababe19be4bb0c729b822 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Tue, 16 Feb 2021 20:44:51 +0100 Subject: [PATCH 14/60] Resolve `AuthorityDiscoveryId` on sending requests. --- node/network/bridge/src/lib.rs | 11 +++++-- node/network/bridge/src/network.rs | 50 +++++++++++++++++++++++++++--- 2 files changed, 53 insertions(+), 8 deletions(-) diff --git a/node/network/bridge/src/lib.rs b/node/network/bridge/src/lib.rs index 030a3c5b8d11..67565e685fba 100644 --- a/node/network/bridge/src/lib.rs +++ b/node/network/bridge/src/lib.rs @@ -118,7 +118,7 @@ impl NetworkBridge { impl Subsystem for NetworkBridge where - Net: Network + validator_discovery::Network, + Net: Network + validator_discovery::Network + Sync, AD: validator_discovery::AuthorityDiscovery, Context: SubsystemContext, { @@ -238,7 +238,10 @@ where Action::SendRequests(reqs) => { for req in reqs { - bridge.network_service.start_request(req); + bridge + .network_service + .start_request(&mut bridge.authority_discovery_service, req) + .await; } }, @@ -615,6 +618,7 @@ mod tests { use polkadot_node_network_protocol::{ObservedRole, request_response::request::Requests}; use crate::network::{Network, NetworkAction}; + use crate::validator_discovery::AuthorityDiscovery; // The subsystem's view of the network - only supports a single call to `event_stream`. struct TestNetwork { @@ -654,6 +658,7 @@ mod tests { ) } + #[async_trait] impl Network for TestNetwork { fn event_stream(&mut self) -> BoxStream<'static, NetworkEvent> { self.net_events.lock() @@ -668,7 +673,7 @@ mod tests { Box::pin((&mut self.action_tx).sink_map_err(Into::into)) } - fn start_request(&self, _: Requests) { + async fn start_request(&self, _: &mut AD, _: Requests) { } } diff --git a/node/network/bridge/src/network.rs b/node/network/bridge/src/network.rs index 77c58e120616..ad8f85d81d55 100644 --- a/node/network/bridge/src/network.rs +++ b/node/network/bridge/src/network.rs @@ -17,6 +17,7 @@ use std::pin::Pin; use std::sync::Arc; +use async_trait::async_trait; use futures::future::BoxFuture; use futures::prelude::*; use futures::stream::BoxStream; @@ -24,7 +25,7 @@ use futures::stream::BoxStream; use parity_scale_codec::Encode; use sc_network::Event as NetworkEvent; -use sc_network::{NetworkService, IfDisconnected}; +use sc_network::{IfDisconnected, NetworkService, OutboundFailure, RequestFailure}; use polkadot_node_network_protocol::{ peer_set::PeerSet, @@ -34,6 +35,8 @@ use polkadot_node_network_protocol::{ use polkadot_primitives::v1::{Block, Hash}; use polkadot_subsystem::{SubsystemError, SubsystemResult}; +use crate::validator_discovery::{peer_id_from_multiaddr, AuthorityDiscovery}; + use super::LOG_TARGET; /// Send a message to the network. @@ -92,6 +95,7 @@ pub enum NetworkAction { } /// An abstraction over networking for the purposes of this subsystem. +#[async_trait] pub trait Network: Send + 'static { /// Get a stream of all events occurring on the network. This may include events unrelated /// to the Polkadot protocol - the user of this function should filter only for events related @@ -105,7 +109,11 @@ pub trait Network: Send + 'static { ) -> Pin + Send + 'a>>; /// Send a request to a remote peer. - fn start_request(&self, req: Requests); + async fn start_request( + &self, + authority_discovery: &mut AD, + req: Requests, + ); /// Report a given peer as either beneficial (+) or costly (-) according to the given scalar. fn report_peer( @@ -137,6 +145,7 @@ pub trait Network: Send + 'static { } } +#[async_trait] impl Network for Arc> { fn event_stream(&mut self) -> BoxStream<'static, NetworkEvent> { NetworkService::event_stream(self, "polkadot-network-bridge").boxed() @@ -189,7 +198,11 @@ impl Network for Arc> { Box::pin(ActionSink(&**self)) } - fn start_request(&self, req: Requests) { + async fn start_request( + &self, + authority_discovery: &mut AD, + req: Requests, + ) { let ( protocol, OutgoingRequest { @@ -199,8 +212,35 @@ impl Network for Arc> { }, ) = req.encode_request(); - NetworkService::start_request(&*self, - peer, + let peer_id = authority_discovery + .get_addresses_by_authority_id(peer) + .await + .and_then(|addrs| { + addrs + .into_iter() + .find_map(|addr| peer_id_from_multiaddr(&addr)) + }); + + let peer_id = match peer_id { + None => { + tracing::debug!(target: LOG_TARGET, "Discovering authority failed"); + match pending_response + .send(Err(RequestFailure::Network(OutboundFailure::DialFailure))) + { + Err(_) => tracing::debug!( + target: LOG_TARGET, + "Sending failed request response failed." + ), + Ok(_) => {} + } + return; + } + Some(peer_id) => peer_id, + }; + + NetworkService::start_request( + &*self, + peer_id, protocol.into_protocol_name(), payload, pending_response, From 6543b303ab92748a18aa17b56e298d3fa6ebb1fc Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Wed, 17 Feb 2021 15:48:27 +0100 Subject: [PATCH 15/60] Rework fetch_task - also make it impossible to check the wrong chunk index. - Export needed function in validator_discovery. --- .../src/fetch_task.rs | 90 +++++++++---------- .../network/bridge/src/validator_discovery.rs | 2 +- .../protocol/src/request_response/v1.rs | 41 +++++++-- 3 files changed, 77 insertions(+), 56 deletions(-) diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/fetch_task.rs index f482593fb9ea..da8c708ea421 100644 --- a/node/network/availability-distribution/src/fetch_task.rs +++ b/node/network/availability-distribution/src/fetch_task.rs @@ -23,16 +23,15 @@ use futures::channel::oneshot; use futures::future::select; use futures::SinkExt; -use sc_network::PeerId; - use polkadot_erasure_coding::branch_hash; use polkadot_node_network_protocol::request_response::{ request::{OutgoingRequest, RequestError, Requests}, v1::{AvailabilityFetchingRequest, AvailabilityFetchingResponse}, }; use polkadot_primitives::v1::{ - BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, ErasureChunk, GroupIndex, Hash, - HashT, OccupiedCore, SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID, + AuthorityDiscoveryId, BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, + ErasureChunk, GroupIndex, Hash, HashT, OccupiedCore, SessionIndex, ValidatorId, + ValidatorIndex, PARACHAIN_KEY_TYPE_ID, }; use polkadot_subsystem::messages::{ AllMessages, AvailabilityDistributionMessage, AvailabilityStoreMessage, ChainApiMessage, @@ -41,7 +40,7 @@ use polkadot_subsystem::messages::{ use polkadot_subsystem::{ errors::{ChainApiError, RuntimeApiError}, jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem, - Subsystem, SubsystemContext, SubsystemError, SubsystemResult + Subsystem, SubsystemContext, SubsystemError, SubsystemResult, }; use super::{session_cache::SessionInfo, LOG_TARGET}; @@ -91,7 +90,7 @@ pub struct BadValidators { /// The group the not properly responding validators are. pub group_index: GroupIndex, /// The indeces of the bad validators. - pub bad_validators: Vec, + pub bad_validators: Vec, } /// Information a running task needs. @@ -103,7 +102,9 @@ struct RunningTask { group_index: GroupIndex, /// Validators to request the chunk from. - group: Vec, + /// + /// This vector gets drained during execution of the task (it will be empty afterwards). + group: Vec, /// The request to send. request: AvailabilityFetchingRequest, @@ -114,9 +115,6 @@ struct RunningTask { /// Relay parent of the candidate to fetch. relay_parent: Hash, - /// Hash of the candidate we are fetching our chunk for. - candidate_hash: CandidateHash, - /// Sender for communicating with other subsystems and reporting results. sender: mpsc::Sender, } @@ -144,10 +142,9 @@ impl FetchTask { }, erasure_root: core.candidate_descriptor.erasure_root, relay_parent: core.candidate_descriptor.relay_parent, - candidate_hash: core.candidate_hash, sender, }; - ctx.spawn("chunk-fetcher", Pin::new(Box::new(running.run(kill)))) + ctx.spawn("chunk-fetcher", running.run(kill).boxed()) .await?; Ok(FetchTask { live_in: vec![leaf].into_iter().collect(), @@ -202,30 +199,20 @@ type Result = std::result::Result; impl RunningTask { async fn run(self, kill: oneshot::Receiver<()>) { // Wait for completion/or cancel. - let _ = select(self.run_inner(), kill); + let run_it = self.run_inner(); + futures::pin_mut!(run_it); + let _ = select(run_it, kill).await; } /// Fetch and store chunk. /// /// Try validators in backing group in order. - async fn run_inner(self) { - let bad_validators = Vec::new(); + async fn run_inner(mut self) { + let mut bad_validators = Vec::new(); // Try validators in order: - for index in self.group { + while let Some(validator)= self.group.pop() { // Send request: - let peer_id = match self.get_peer_id(index).await { - Ok(peer_id) => peer_id, - Err(err) => { - tracing::warn!( - target: LOG_TARGET, - validator_index = ?index, - "Discoverying peer id for validator failed" - ); - bad_validators.push(index); - continue - } - }; - let resp = match self.do_request(peer_id).await { + let resp = match self.do_request(&validator).await { Ok(resp) => resp, Err(TaskError::ShuttingDown) => { tracing::info!( @@ -235,17 +222,19 @@ impl RunningTask { return; } Err(TaskError::PeerError) => { - bad_validators.push(index); + bad_validators.push(validator); continue; } }; let chunk = match resp { - AvailabilityFetchingResponse::Chunk(chunk) => chunk, + AvailabilityFetchingResponse::Chunk(resp) => { + resp.reconstruct_erasure_chunk(&self.request) + } }; // Data genuine? - if !self.validate_chunk(peer_id, &chunk) { - bad_validators.push(index); + if !self.validate_chunk(&validator, &chunk) { + bad_validators.push(validator); continue; } @@ -258,16 +247,17 @@ impl RunningTask { /// Do request and return response, if successful. async fn do_request( - &self, - peer: PeerId, + &mut self, + validator: &AuthorityDiscoveryId, ) -> std::result::Result { - let (full_request, response_recv) = OutgoingRequest::new(peer, self.request); + let (full_request, response_recv) = + OutgoingRequest::new(validator.clone(), self.request); let requests = Requests::AvailabilityFetching(full_request); self.sender - .send(FromFetchTask::Message( - AllMessages::NetworkBridge(NetworkBridgeMessage::SendRequests(vec![requests])), - )) + .send(FromFetchTask::Message(AllMessages::NetworkBridge( + NetworkBridgeMessage::SendRequests(vec![requests]), + ))) .await .map_err(|_| TaskError::ShuttingDown)?; @@ -276,6 +266,7 @@ impl RunningTask { Err(RequestError::InvalidResponse(err)) => { tracing::warn!( target: LOG_TARGET, + origin= ?validator, "Peer sent us invalid erasure chunk data" ); Err(TaskError::PeerError) @@ -283,26 +274,29 @@ impl RunningTask { Err(RequestError::NetworkError(err)) => { tracing::warn!( target: LOG_TARGET, + origin= ?validator, "Some network error occurred when fetching erasure chunk" ); Err(TaskError::PeerError) } Err(RequestError::Canceled(err)) => { - tracing::warn!(target: LOG_TARGET, "Erasure chunk request got canceled"); + tracing::warn!(target: LOG_TARGET, + origin= ?validator, + "Erasure chunk request got canceled"); Err(TaskError::PeerError) } } } - fn validate_chunk(&self, peer_id: &PeerId, chunk: &ErasureChunk) -> bool { + fn validate_chunk(&self, validator: &AuthorityDiscoveryId, chunk: &ErasureChunk) -> bool { let anticipated_hash = match branch_hash(&self.erasure_root, &chunk.proof, chunk.index as usize) { Ok(hash) => hash, Err(e) => { tracing::trace!( target: LOG_TARGET, - candidate_hash = ?self.candidate_hash, - origin = ?peer_id, + candidate_hash = ?self.request.candidate_hash, + origin = ?validator, error = ?e, "Failed to calculate chunk merkle proof", ); @@ -311,23 +305,19 @@ impl RunningTask { }; let erasure_chunk_hash = BlakeTwo256::hash(&chunk.chunk); if anticipated_hash != erasure_chunk_hash { - tracing::warn!(target: LOG_TARGET, origin = ?peer_id, "Received chunk does not match merkle tree"); + tracing::warn!(target: LOG_TARGET, origin = ?validator, "Received chunk does not match merkle tree"); return false; } true } - fn get_peer_id(&self, index: ValidatorIndex) -> Result { - panic!("TO BE IMPLEMENTED"); - } - /// Store given chunk and log any error. async fn store_chunk(&mut self, chunk: ErasureChunk) { let (tx, rx) = oneshot::channel(); self.sender .send(FromFetchTask::Message(AllMessages::AvailabilityStore( AvailabilityStoreMessage::StoreChunk { - candidate_hash: self.candidate_hash, + candidate_hash: self.request.candidate_hash, relay_parent: self.relay_parent, chunk, tx, @@ -341,7 +331,7 @@ impl RunningTask { } /// Tell subsystem we are done. - async fn conclude(&mut self, bad_validators: Vec) { + async fn conclude(&mut self, bad_validators: Vec) { let payload = if bad_validators.is_empty() { None } else { diff --git a/node/network/bridge/src/validator_discovery.rs b/node/network/bridge/src/validator_discovery.rs index 926aa3706649..06fb5b65bdb8 100644 --- a/node/network/bridge/src/validator_discovery.rs +++ b/node/network/bridge/src/validator_discovery.rs @@ -126,7 +126,7 @@ fn on_revoke(map: &mut HashMap, id: AuthorityDiscover None } -fn peer_id_from_multiaddr(addr: &Multiaddr) -> Option { +pub(crate) fn peer_id_from_multiaddr(addr: &Multiaddr) -> Option { addr.iter().last().and_then(|protocol| if let Protocol::P2p(multihash) = protocol { PeerId::from_multihash(multihash).ok() } else { diff --git a/node/network/protocol/src/request_response/v1.rs b/node/network/protocol/src/request_response/v1.rs index 04a7865bfc44..53e58f6b48b1 100644 --- a/node/network/protocol/src/request_response/v1.rs +++ b/node/network/protocol/src/request_response/v1.rs @@ -24,18 +24,49 @@ use super::request::IsRequest; use super::Protocol; /// Request an availability chunk. -#[derive(Debug, Clone, Encode, Decode)] +#[derive(Debug, Copy, Clone, Encode, Decode)] pub struct AvailabilityFetchingRequest { - candidate_hash: CandidateHash, - index: ValidatorIndex, + pub candidate_hash: CandidateHash, + pub index: ValidatorIndex, } /// Receive a rqeuested erasure chunk. #[derive(Debug, Clone, Encode, Decode)] pub enum AvailabilityFetchingResponse { - /// The requested chunk. + /// The requested chunk data. #[codec(index = 0)] - Chunk(ErasureChunk), + Chunk(ChunkResponse), +} + +/// Skimmed down variant of `ErasureChunk`. +/// +/// Instead of transmitting a full `ErasureChunk` we transmit `ChunkResponse` in +/// `AvailabilityFetchingResponse`, which omits the chunk's index. The index is already known by +/// the requester and by not transmitting it, we ensure the requester is going to use his index +/// value for validating the response, thus making sure he got what he requested. +#[derive(Debug, Clone, Encode, Decode)] +pub struct ChunkResponse { + /// The erasure-encoded chunk of data belonging to the candidate block. + pub chunk: Vec, + /// Proof for this chunk's branch in the Merkle tree. + pub proof: Vec>, +} + +impl From for ChunkResponse { + fn from(ErasureChunk {chunk, index: _, proof}: ErasureChunk) -> Self { + ChunkResponse { chunk, proof} + } +} + +impl ChunkResponse { + /// Re-build an `ErasureChunk` from response and request. + pub fn reconstruct_erasure_chunk(self, req: &AvailabilityFetchingRequest) -> ErasureChunk { + ErasureChunk { + chunk: self.chunk, + proof: self.proof, + index: req.index.0, + } + } } impl IsRequest for AvailabilityFetchingRequest { From 256e559fbea7e5b42745dc4139cbdd3dbbffd917 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Wed, 17 Feb 2021 15:49:16 +0100 Subject: [PATCH 16/60] From implementation for `ValidatorIndex`. --- primitives/src/v0.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/primitives/src/v0.rs b/primitives/src/v0.rs index 8c6b4f538a54..ecb9f9cf3e53 100644 --- a/primitives/src/v0.rs +++ b/primitives/src/v0.rs @@ -119,6 +119,13 @@ impl MallocSizeOf for ValidatorId { #[derive(Debug, MallocSizeOf)] pub struct ValidatorIndex(pub u32); +// We should really get https://github.com/paritytech/polkadot/issues/2403 going .. +impl From for ValidatorIndex { + fn from(n: u32) -> Self { + ValidatorIndex(n) + } +} + application_crypto::with_pair! { /// A Parachain validator keypair. pub type ValidatorPair = validator_app::Pair; From f8d5fef85313da4041b7b4f571d3fbad91f0c622 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Wed, 17 Feb 2021 22:02:12 +0100 Subject: [PATCH 17/60] Fixes and more integration work. --- .../availability-distribution/src/error.rs | 4 ++ .../src/fetch_task.rs | 25 +++----- .../availability-distribution/src/lib.rs | 11 ++-- .../src/session_cache.rs | 17 ++++- .../availability-distribution/src/state.rs | 63 ++++++++++++++----- 5 files changed, 81 insertions(+), 39 deletions(-) diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs index 22f363f6584f..c507d5835604 100644 --- a/node/network/availability-distribution/src/error.rs +++ b/node/network/availability-distribution/src/error.rs @@ -55,6 +55,10 @@ pub enum Error { /// We tried fetching a session which was not available. #[error("No such session")] NoSuchSession(SessionIndex), + + /// Spawning a running task failed. + #[error("Spawning subsystem task failed")] + SpawnTask(#[source] SubsystemError), } pub type Result = std::result::Result; diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/fetch_task.rs index da8c708ea421..a73edc50a00f 100644 --- a/node/network/availability-distribution/src/fetch_task.rs +++ b/node/network/availability-distribution/src/fetch_task.rs @@ -21,7 +21,7 @@ use std::rc::Rc; use futures::channel::mpsc; use futures::channel::oneshot; use futures::future::select; -use futures::SinkExt; +use futures::{SinkExt, FutureExt}; use polkadot_erasure_coding::branch_hash; use polkadot_node_network_protocol::request_response::{ @@ -43,7 +43,7 @@ use polkadot_subsystem::{ Subsystem, SubsystemContext, SubsystemError, SubsystemResult, }; -use super::{session_cache::SessionInfo, LOG_TARGET}; +use super::{session_cache::{SessionInfo, BadValidators}, LOG_TARGET, error::{Error, Result}}; pub struct FetchTask { /// For what relay parents this task is relevant. @@ -83,16 +83,6 @@ pub enum FromFetchTask { Concluded(Option), } -/// Report of bad validators. -pub struct BadValidators { - /// The session index that was used. - pub session_index: SessionIndex, - /// The group the not properly responding validators are. - pub group_index: GroupIndex, - /// The indeces of the bad validators. - pub bad_validators: Vec, -} - /// Information a running task needs. struct RunningTask { /// For what session we have been spawned. @@ -127,7 +117,7 @@ impl FetchTask { core: OccupiedCore, session_info: Rc, sender: mpsc::Sender, - ) -> SubsystemResult + ) -> Result where Context: SubsystemContext, { @@ -135,7 +125,7 @@ impl FetchTask { let running = RunningTask { session_index: session_info.session_index, group_index: core.group_responsible, - group: session_info.validator_groups.get(core.group_responsible.into() as usize).expect("The responsible group of a candidate should be available in the corresponding session. qed.").clone(), + group: session_info.validator_groups.get(core.group_responsible.0 as usize).expect("The responsible group of a candidate should be available in the corresponding session. qed.").clone(), request: AvailabilityFetchingRequest { candidate_hash: core.candidate_hash, index: session_info.our_index, @@ -145,7 +135,8 @@ impl FetchTask { sender, }; ctx.spawn("chunk-fetcher", running.run(kill).boxed()) - .await?; + .await + .map_err(|e| Error::SpawnTask(e))?; Ok(FetchTask { live_in: vec![leaf].into_iter().collect(), state: FetchedState::Started(handle), @@ -194,8 +185,6 @@ enum TaskError { ShuttingDown, } -type Result = std::result::Result; - impl RunningTask { async fn run(self, kill: oneshot::Receiver<()>) { // Wait for completion/or cancel. @@ -210,7 +199,7 @@ impl RunningTask { async fn run_inner(mut self) { let mut bad_validators = Vec::new(); // Try validators in order: - while let Some(validator)= self.group.pop() { + while let Some(validator) = self.group.pop() { // Send request: let resp = match self.do_request(&validator).await { Ok(resp) => resp, diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs index c0792db5d722..a02bcc2a38f3 100644 --- a/node/network/availability-distribution/src/lib.rs +++ b/node/network/availability-distribution/src/lib.rs @@ -15,6 +15,8 @@ // along with Polkadot. If not, see . +use futures::{FutureExt, TryFutureExt}; + use sp_keystore::SyncCryptoStorePtr; use polkadot_subsystem::{ @@ -60,7 +62,7 @@ where { fn start(self, ctx: Context) -> SpawnedSubsystem { let future = self - .run(ctx, ProtocolState::new()) + .run(ctx) .map_err(|e| SubsystemError::with_origin("availability-distribution", e)) .boxed(); @@ -79,16 +81,17 @@ impl AvailabilityDistributionSubsystem { } /// Start processing work as passed on from the Overseer. - async fn run(self, mut ctx: Context, state: &mut ProtocolState) -> Result<()> + async fn run(self, mut ctx: Context) -> Result<()> where Context: SubsystemContext + Sync + Send, { + let mut state = ProtocolState::new(self.keystore.clone()); loop { let message = ctx.recv().await?; match message { FromOverseer::Signal(OverseerSignal::ActiveLeaves(update)) => { // Update the relay chain heads we are fetching our pieces for: - state.update_fetching_heads(&mut ctx, update)?; + state.update_fetching_heads(&mut ctx, update).await?; } FromOverseer::Signal(OverseerSignal::BlockFinalized(..)) => {} FromOverseer::Signal(OverseerSignal::Conclude) => { @@ -96,7 +99,7 @@ impl AvailabilityDistributionSubsystem { } FromOverseer::Communication { msg: AvailabilityDistributionMessage::AvailabilityFetchingRequest(_), - } => { + } => { // TODO: Implement issue 2306: tracing::warn!( target: LOG_TARGET, diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs index f31984f62479..b09d0480afee 100644 --- a/node/network/availability-distribution/src/session_cache.rs +++ b/node/network/availability-distribution/src/session_cache.rs @@ -28,8 +28,9 @@ use polkadot_node_subsystem_util::{ }; use polkadot_primitives::v1::SessionInfo as GlobalSessionInfo; use polkadot_primitives::v1::{ - BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, ErasureChunk, Hash, HashT, - SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID, AuthorityDiscoveryId + AuthorityDiscoveryId, BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, + ErasureChunk, Hash, HashT, SessionIndex, ValidatorId, ValidatorIndex, + PARACHAIN_KEY_TYPE_ID, GroupIndex, }; use polkadot_subsystem::{ jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem, @@ -84,6 +85,16 @@ pub struct SessionInfo { // pub our_group: GroupIndex, } +/// Report of bad validators. +pub struct BadValidators { + /// The session index that was used. + pub session_index: SessionIndex, + /// The group the not properly responding validators are. + pub group_index: GroupIndex, + /// The indeces of the bad validators. + pub bad_validators: Vec, +} + impl SessionCache { pub(crate) fn new(keystore: SyncCryptoStorePtr) -> Self { SessionCache { @@ -183,7 +194,7 @@ impl SessionCache { group .into_iter() .map(|index| { - discovery_keys.get(Into::into(index) as usize) + discovery_keys.get(index.0 as usize) .expect("There should be a discovery key for each validator of each validator group. qed.").clone() }) .collect() diff --git a/node/network/availability-distribution/src/state.rs b/node/network/availability-distribution/src/state.rs index 3668ae6310c4..76d90e3581a3 100644 --- a/node/network/availability-distribution/src/state.rs +++ b/node/network/availability-distribution/src/state.rs @@ -60,10 +60,12 @@ use std::collections::{ use std::iter::IntoIterator; use std::sync::Arc; -use futures::channel::oneshot; +use futures::channel::{mpsc, oneshot}; +use futures::StreamExt; +use itertools::{Either, Itertools}; use jaeger::JaegerSpan; -use itertools::{Either, Itertools}; +use sp_keystore::SyncCryptoStorePtr; use polkadot_node_subsystem_util::request_availability_cores_ctx; use polkadot_primitives::v1::{ @@ -78,7 +80,12 @@ use polkadot_subsystem::{ SubsystemContext, SubsystemError, }; -use super::{fetch_task::FetchTask, session_cache::SessionCache, Result, LOG_TARGET, error::recv_runtime}; +use super::{ + error::recv_runtime, + fetch_task::{FetchTask, FromFetchTask}, + session_cache::SessionCache, + Result, LOG_TARGET, +}; /// A running instance of this subsystem. pub struct ProtocolState { @@ -89,13 +96,23 @@ pub struct ProtocolState { /// /// This is usually the current one and at session boundaries also the last one. session_cache: SessionCache, + + /// Sender to be cloned for `FetchTask`s. + tx: mpsc::Sender, + + /// Receive messages from `FetchTask`. + rx: mpsc::Receiver, } impl ProtocolState { pub(crate) fn new(keystore: SyncCryptoStorePtr) -> Self { + // All we do is forwarding messages, no need to make this big. + let (tx, rx) = mpsc::channel(1); ProtocolState { fetches: HashMap::new(), session_cache: SessionCache::new(keystore), + tx, + rx, } } /// Update heads that need availability distribution. @@ -115,11 +132,26 @@ impl ProtocolState { } = update; // Order important! We need to handle activated, prior to deactivated, otherwise we might // cancel still needed jobs. - self.start_requesting_chunks(ctx, activated.into_iter()).await?; + self.start_requesting_chunks(ctx, activated.into_iter()) + .await?; self.stop_requesting_chunks(deactivated.into_iter()); Ok(()) } + pub(crate) async fn advance(&mut self, ctx: &mut Context) -> Result<()> + where + Context: SubsystemContext, + { + match self.rx.next().await { + Some(FromFetchTask::Message(m)) => ctx.send_message(m).await, + Some(FromFetchTask::Concluded(Some(bad_boys))) => { + self.session_cache.report_bad(bad_boys)? + } + Some(FromFetchTask::Concluded(None)) => {} + } + Ok(()) + } + /// Start requesting chunks for newly imported heads. async fn start_requesting_chunks( &mut self, @@ -140,11 +172,8 @@ impl ProtocolState { /// /// Returns relay_parents which became irrelevant for availability fetching (are not /// referenced by any candidate anymore). - fn stop_requesting_chunks( - &mut self, - obsolete_leaves: impl Iterator, - ) { - let obsolete_leaves: HashSet<_> = obsolete_leaves.into_iter().map(|h| h.0).collect(); + fn stop_requesting_chunks(&mut self, obsolete_leaves: impl Iterator) { + let obsolete_leaves: HashSet<_> = obsolete_leaves.into_iter().collect(); self.fetches.retain(|&c_hash, task| { task.remove_leaves(obsolete_leaves); task.is_live() @@ -164,20 +193,26 @@ impl ProtocolState { leaf: Hash, cores: impl IntoIterator, ) -> Result<()> - where + where Context: SubsystemContext, { for core in cores { match self.fetches.entry(core.candidate_hash) { - Entry::Occupied(e) => + Entry::Occupied(mut e) => // Just book keeping - we are already requesting that chunk: - e.get_mut().add_leaf(leaf), + { + e.get_mut().add_leaf(leaf) + } Entry::Vacant(e) => { let session_info = self .session_cache - .fetch_session_info(ctx, core.candidate_descriptor.relay_parent).await?; + .fetch_session_info(ctx, core.candidate_descriptor.relay_parent) + .await?; if let Some(session_info) = session_info { - e.insert(FetchTask::start(ctx, leaf, core, session_info)) + e.insert( + FetchTask::start(ctx, leaf, core, session_info, self.tx.clone()) + .await?, + ); } // Not a validator, nothing to do. } From 5e77fb4ce0456b74a5d0657d742c140a42ae8dd3 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Thu, 18 Feb 2021 10:17:42 +0100 Subject: [PATCH 18/60] Make session cache proper lru cache. --- .../src/session_cache.rs | 98 ++++++++++++------- 1 file changed, 63 insertions(+), 35 deletions(-) diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs index b09d0480afee..2ebc353f4fae 100644 --- a/node/network/availability-distribution/src/session_cache.rs +++ b/node/network/availability-distribution/src/session_cache.rs @@ -14,9 +14,10 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::rc::{Rc, Weak}; +use lru::LruCache; use rand::{seq::SliceRandom, thread_rng}; use sp_application_crypto::AppKey; @@ -29,8 +30,8 @@ use polkadot_node_subsystem_util::{ use polkadot_primitives::v1::SessionInfo as GlobalSessionInfo; use polkadot_primitives::v1::{ AuthorityDiscoveryId, BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, - ErasureChunk, Hash, HashT, SessionIndex, ValidatorId, ValidatorIndex, - PARACHAIN_KEY_TYPE_ID, GroupIndex, + ErasureChunk, GroupIndex, Hash, HashT, SessionIndex, ValidatorId, ValidatorIndex, + PARACHAIN_KEY_TYPE_ID, }; use polkadot_subsystem::{ jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem, @@ -47,20 +48,18 @@ use super::{ /// It should be ensured that a cached session stays live in the cache as long as we might need it. /// A warning will be logged, if an already dead entry gets fetched. pub struct SessionCache { - /// Maintain caches for session information for currently relay parents of interest. + /// Get the session index for a given relay parent. /// - /// Fast path - if we have an entry here, no query to the runtime is necessary at all. - by_relay_parent: HashMap>, + /// We query this up to a 100 times per block, so caching it here without roundtrips over the + /// overseer seems sensible. + session_index_cache: LruCache, /// Look up cached sessions by SessionIndex. /// - /// Slower path - we still have to look up the `SessionIndex` in the runtime, but still might have - /// the session ready already. - /// /// Note: Performance of fetching is really secondary here, but we need to ensure we are going /// to get any existing cache entry, before fetching new information, as we should not mess up - /// the order of validators. - by_session_index: HashMap>, + /// the order of validators. (We want live TCP connections wherever possible.) + session_info_cache: LruCache, /// Key store for determining whether we are a validator and what `ValidatorIndex` we have. keystore: SyncCryptoStorePtr, @@ -79,6 +78,7 @@ pub struct SessionInfo { /// Information about ourself: pub our_index: ValidatorIndex, + //// Remember to which group we blong, so we won't start fetching chunks for candidates we //// backed our selves. // TODO: Implement this: @@ -98,8 +98,10 @@ pub struct BadValidators { impl SessionCache { pub(crate) fn new(keystore: SyncCryptoStorePtr) -> Self { SessionCache { - by_relay_parent: HashMap::new(), - by_session_index: HashMap::new(), + // 5 relatively conservative, 1 to 2 should suffice: + session_index_cache: LruCache::new(5), + // We need to cache the current and the last session the most: + session_info_cache: LruCache::new(2), keystore, } } @@ -112,37 +114,64 @@ impl SessionCache { &mut self, ctx: &mut Context, parent: Hash, - ) -> Result>> + ) -> Result> where Context: SubsystemContext, { - if let Some(info) = self.get_by_relay_parent(parent) { - return Ok(Some(info)); - } - let session_index = - recv_runtime(request_session_index_for_child_ctx(parent, ctx).await).await?; - if let Some(info) = self.get_by_session_index(session_index) { - self.by_relay_parent.insert(parent, Rc::downgrade(&info)); - return Ok(Some(info)); - } + let session_index = match self.session_index_cache.get(parent) { + Some(index) => index, + None => { + let index = + recv_runtime(request_session_index_for_child_ctx(parent, ctx).await) + .await?; + self.session_index_cache.put(parent, index); + index + } + }; - // About to fetch new stuff, time to get rid of dead bodies: We keep relay_parent to - // session info matches way longer than necessary (for an entire session), but the overhead - // should be low enough to not matter. - self.bury_dead(); + if let Some(info) = self.session_info_cache.get(session_index) { + return Ok(Some(info.clone())); + } if let Some(info) = self .query_info_from_runtime(ctx, parent, session_index) .await? { - self.by_relay_parent.insert(parent, Rc::downgrade(&info)); - self.by_session_index - .insert(session_index, Rc::downgrade(&info)); + self.session_info_cache.put(session_index, info.clone()); return Ok(Some(info)); } Ok(None) } + pub async with_session_info( + &mut self, + ctx: &mut Context, + parent: Hash, + with_info: F, + ) -> R + where + Context: SubsystemContext, + F: Fn(info: &SessionInfo) -> R + { + } + + /// Make sure we try unresponsive or misbehaving validators last. + pub fn report_bad(&mut self, report: BadValidators) -> Result<()> { + let session = self + .session_info_cache + .get_mut(&report.session_index) + .ok_or(Error::ReportBadValidators("Session is not cached."))?; + let group = session + .validator_groups + .get_mut(report.group_index.0 as usize) + .ok_or(Error::ReportBadValidators("Validator group not found"))?; + let bad_set = report.bad_validators.iter().collect::>(); + // Put the bad boys last: + group.retain(|v| !bad_set.contains(v)); + group.append(report.bad_validators); + Ok(()) + } + /// Get session info for a particular relay parent. /// /// Returns: None, if no entry for that relay parent exists in the cache (or it was dead @@ -154,8 +183,7 @@ impl SessionCache { /// Get session info for a given `SessionIndex`. fn get_by_session_index(&self, session_index: SessionIndex) -> Option> { - let weak_ref = self.by_session_index.get(&session_index)?; - upgrade_report_dead(weak_ref) + self.by_session_index.get(&session_index) } /// Query needed information from runtime. @@ -168,7 +196,7 @@ impl SessionCache { ctx: &mut Context, parent: Hash, session_index: SessionIndex, - ) -> Result>> + ) -> Result> where Context: SubsystemContext, { @@ -201,11 +229,11 @@ impl SessionCache { }) .collect(); - let info = Rc::new(SessionInfo { + let info = SessionInfo { validator_groups, our_index, session_index, - }); + }; return Ok(Some(info)); } return Ok(None); From 72704ee0e2341416308edbbc450d8cd77aee887f Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Thu, 18 Feb 2021 12:50:43 +0100 Subject: [PATCH 19/60] Use proper lru cache. --- .../availability-distribution/Cargo.toml | 1 + .../availability-distribution/src/error.rs | 4 + .../src/fetch_task.rs | 72 ++++++++++------ .../src/session_cache.rs | 85 +++++++------------ .../availability-distribution/src/state.rs | 36 +++----- 5 files changed, 94 insertions(+), 104 deletions(-) diff --git a/node/network/availability-distribution/Cargo.toml b/node/network/availability-distribution/Cargo.toml index 03cd654d6f0a..7bbd73bdbfd6 100644 --- a/node/network/availability-distribution/Cargo.toml +++ b/node/network/availability-distribution/Cargo.toml @@ -21,6 +21,7 @@ sp-keystore = { git = "https://github.com/paritytech/substrate", branch = "maste thiserror = "1.0.23" itertools = "0.10.0" rand = "0.8.3" +lru = "0.6.5" [dev-dependencies] polkadot-subsystem-testhelpers = { package = "polkadot-node-subsystem-test-helpers", path = "../../subsystem-test-helpers" } diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs index c507d5835604..c6ccba4ab61c 100644 --- a/node/network/availability-distribution/src/error.rs +++ b/node/network/availability-distribution/src/error.rs @@ -59,6 +59,10 @@ pub enum Error { /// Spawning a running task failed. #[error("Spawning subsystem task failed")] SpawnTask(#[source] SubsystemError), + + /// Reporting bad validators failed. + #[error("Reporting bad validators failed")] + ReportBadValidators(&'static str), } pub type Result = std::result::Result; diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/fetch_task.rs index a73edc50a00f..d12e1c76c984 100644 --- a/node/network/availability-distribution/src/fetch_task.rs +++ b/node/network/availability-distribution/src/fetch_task.rs @@ -21,7 +21,7 @@ use std::rc::Rc; use futures::channel::mpsc; use futures::channel::oneshot; use futures::future::select; -use futures::{SinkExt, FutureExt}; +use futures::{FutureExt, SinkExt}; use polkadot_erasure_coding::branch_hash; use polkadot_node_network_protocol::request_response::{ @@ -43,7 +43,20 @@ use polkadot_subsystem::{ Subsystem, SubsystemContext, SubsystemError, SubsystemResult, }; -use super::{session_cache::{SessionInfo, BadValidators}, LOG_TARGET, error::{Error, Result}}; +use super::{ + error::{Error, Result}, + session_cache::{BadValidators, SessionInfo}, + LOG_TARGET, +}; + +/// Configuration for a `FetchTask` +/// +/// This exists to separate preparation of a `FetchTask` from actual starting it, which is +/// beneficial as this allows as for taking session info by reference. +pub struct FetchTaskConfig { + prepared_running: RunningTask, + live_in: HashSet, +} pub struct FetchTask { /// For what relay parents this task is relevant. @@ -56,9 +69,6 @@ pub struct FetchTask { /// We keep the task around in state `Fetched` until `live_in` becomes empty, to make /// sure we won't re-fetch an already fetched candidate. state: FetchedState, - - /// Session information. - session: Rc, } /// State of a particular candidate chunk fetching process. @@ -109,20 +119,17 @@ struct RunningTask { sender: mpsc::Sender, } -impl FetchTask { - /// Start fetching a chunk. - pub async fn start( - ctx: &mut Context, +impl FetchTaskConfig { + /// Create a new configuration for a [`FetchTask`]. + /// + /// The result of this function can be passed into [`FetchTask::start`]. + pub fn new( leaf: Hash, - core: OccupiedCore, - session_info: Rc, + core: &OccupiedCore, sender: mpsc::Sender, - ) -> Result - where - Context: SubsystemContext, - { - let (handle, kill) = oneshot::channel(); - let running = RunningTask { + session_info: &SessionInfo, + ) -> Self { + let prepared_running = RunningTask { session_index: session_info.session_index, group_index: core.group_responsible, group: session_info.validator_groups.get(core.group_responsible.0 as usize).expect("The responsible group of a candidate should be available in the corresponding session. qed.").clone(), @@ -134,13 +141,30 @@ impl FetchTask { relay_parent: core.candidate_descriptor.relay_parent, sender, }; - ctx.spawn("chunk-fetcher", running.run(kill).boxed()) + FetchTaskConfig { + live_in: vec![leaf].into_iter().collect(), + prepared_running, + } + } +} + +impl FetchTask { + /// Start fetching a chunk. + pub async fn start(config: FetchTaskConfig, ctx: &mut Context) -> Result + where + Context: SubsystemContext, + { + let FetchTaskConfig { + prepared_running, + live_in, + } = config; + let (handle, kill) = oneshot::channel(); + ctx.spawn("chunk-fetcher", prepared_running.run(kill).boxed()) .await .map_err(|e| Error::SpawnTask(e))?; Ok(FetchTask { - live_in: vec![leaf].into_iter().collect(), + live_in, state: FetchedState::Started(handle), - session: session_info, }) } @@ -151,8 +175,8 @@ impl FetchTask { /// Remove leaves and cancel the task, if it was the last one and the task has still been /// fetching. - pub fn remove_leaves(&mut self, leaves: HashSet) { - self.live_in.difference(&leaves); + pub fn remove_leaves(&mut self, leaves: &HashSet) { + self.live_in.difference(leaves); if self.live_in.is_empty() { self.state = FetchedState::Canceled } @@ -162,7 +186,7 @@ impl FetchTask { /// /// That is, it is either canceled, succeeded or failed. pub fn is_finished(&self) -> bool { - match self.state { + match &self.state { FetchedState::Canceled => true, FetchedState::Started(sender) => sender.is_canceled(), } @@ -303,7 +327,7 @@ impl RunningTask { /// Store given chunk and log any error. async fn store_chunk(&mut self, chunk: ErasureChunk) { let (tx, rx) = oneshot::channel(); - self.sender + let r = self.sender .send(FromFetchTask::Message(AllMessages::AvailabilityStore( AvailabilityStoreMessage::StoreChunk { candidate_hash: self.request.candidate_hash, diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs index 2ebc353f4fae..f403df2b0c09 100644 --- a/node/network/availability-distribution/src/session_cache.rs +++ b/node/network/availability-distribution/src/session_cache.rs @@ -66,6 +66,7 @@ pub struct SessionCache { } /// Localized session information, tailored for the needs of availability distribution. +#[derive(Clone)] pub struct SessionInfo { /// The index of this session. pub session_index: SessionIndex, @@ -118,8 +119,27 @@ impl SessionCache { where Context: SubsystemContext, { - let session_index = match self.session_index_cache.get(parent) { - Some(index) => index, + self.with_session_info(ctx, parent, Clone::clone).await + } + + /// Tries to retrieve `SessionInfo` and calls `with_info` if successful. + /// + /// If this node is not a validator, the function will return `None`. + /// + /// Use this function over `fetch_session_info` if all you need is a reference to + /// `SessionInfo`, as it avoids an expensive clone. + pub async fn with_session_info( + &mut self, + ctx: &mut Context, + parent: Hash, + with_info: F, + ) -> Result> + where + Context: SubsystemContext, + F: FnOnce(&SessionInfo) -> R + { + let session_index = match self.session_index_cache.get(&parent) { + Some(index) => *index, None => { let index = recv_runtime(request_session_index_for_child_ctx(parent, ctx).await) @@ -129,34 +149,23 @@ impl SessionCache { } }; - if let Some(info) = self.session_info_cache.get(session_index) { - return Ok(Some(info.clone())); + if let Some(info) = self.session_info_cache.get(&session_index) { + return Ok(Some(with_info(info))) } if let Some(info) = self .query_info_from_runtime(ctx, parent, session_index) .await? { - self.session_info_cache.put(session_index, info.clone()); - return Ok(Some(info)); + let r = with_info(&info); + self.session_info_cache.put(session_index, info); + return Ok(Some(r)); } Ok(None) } - pub async with_session_info( - &mut self, - ctx: &mut Context, - parent: Hash, - with_info: F, - ) -> R - where - Context: SubsystemContext, - F: Fn(info: &SessionInfo) -> R - { - } - /// Make sure we try unresponsive or misbehaving validators last. - pub fn report_bad(&mut self, report: BadValidators) -> Result<()> { + pub fn report_bad(&mut self, mut report: BadValidators) -> Result<()> { let session = self .session_info_cache .get_mut(&report.session_index) @@ -168,24 +177,10 @@ impl SessionCache { let bad_set = report.bad_validators.iter().collect::>(); // Put the bad boys last: group.retain(|v| !bad_set.contains(v)); - group.append(report.bad_validators); + group.append(&mut report.bad_validators); Ok(()) } - /// Get session info for a particular relay parent. - /// - /// Returns: None, if no entry for that relay parent exists in the cache (or it was dead - /// already - which should not happen.) - fn get_by_relay_parent(&self, parent: Hash) -> Option> { - let weak_ref = self.by_relay_parent.get(&parent)?; - upgrade_report_dead(weak_ref) - } - - /// Get session info for a given `SessionIndex`. - fn get_by_session_index(&self, session_index: SessionIndex) -> Option> { - self.by_session_index.get(&session_index) - } - /// Query needed information from runtime. /// /// We need to pass in the relay parent for our call to `request_session_info_ctx`. We should @@ -252,26 +247,4 @@ impl SessionCache { } None } - - /// Get rid of the dead bodies from time to time. - fn bury_dead(&mut self) { - self.by_session_index - .retain(|_, info| info.upgrade().is_some()); - self.by_relay_parent - .retain(|_, info| info.upgrade().is_some()); - } -} - -/// Upgrade a weak SessionInfo reference. -/// -/// Warn if it was dead already, as this should not happen. Cache should stay valid at least as -/// long as we need it. -fn upgrade_report_dead(info: &Weak) -> Option> { - match info.upgrade() { - Some(info) => Some(info), - None => { - tracing::warn!(LOG_TARGET, "A no longer cached session got requested, this should not happen in normal operation."); - None - } - } } diff --git a/node/network/availability-distribution/src/state.rs b/node/network/availability-distribution/src/state.rs index 76d90e3581a3..10e6ddfef473 100644 --- a/node/network/availability-distribution/src/state.rs +++ b/node/network/availability-distribution/src/state.rs @@ -82,7 +82,7 @@ use polkadot_subsystem::{ use super::{ error::recv_runtime, - fetch_task::{FetchTask, FromFetchTask}, + fetch_task::{FetchTask, FromFetchTask, FetchTaskConfig}, session_cache::SessionCache, Result, LOG_TARGET, }; @@ -138,20 +138,6 @@ impl ProtocolState { Ok(()) } - pub(crate) async fn advance(&mut self, ctx: &mut Context) -> Result<()> - where - Context: SubsystemContext, - { - match self.rx.next().await { - Some(FromFetchTask::Message(m)) => ctx.send_message(m).await, - Some(FromFetchTask::Concluded(Some(bad_boys))) => { - self.session_cache.report_bad(bad_boys)? - } - Some(FromFetchTask::Concluded(None)) => {} - } - Ok(()) - } - /// Start requesting chunks for newly imported heads. async fn start_requesting_chunks( &mut self, @@ -175,7 +161,7 @@ impl ProtocolState { fn stop_requesting_chunks(&mut self, obsolete_leaves: impl Iterator) { let obsolete_leaves: HashSet<_> = obsolete_leaves.into_iter().collect(); self.fetches.retain(|&c_hash, task| { - task.remove_leaves(obsolete_leaves); + task.remove_leaves(&obsolete_leaves); task.is_live() }) } @@ -201,18 +187,20 @@ impl ProtocolState { Entry::Occupied(mut e) => // Just book keeping - we are already requesting that chunk: { - e.get_mut().add_leaf(leaf) + e.get_mut().add_leaf(leaf); } Entry::Vacant(e) => { - let session_info = self + let tx = self.tx.clone(); + let task_cfg = self .session_cache - .fetch_session_info(ctx, core.candidate_descriptor.relay_parent) + .with_session_info( + ctx, + core.candidate_descriptor.relay_parent, + |info| FetchTaskConfig::new(leaf, &core, tx, info), + ) .await?; - if let Some(session_info) = session_info { - e.insert( - FetchTask::start(ctx, leaf, core, session_info, self.tx.clone()) - .await?, - ); + if let Some(task_cfg) = task_cfg { + e.insert(FetchTask::start(task_cfg, ctx).await?); } // Not a validator, nothing to do. } From 60a2faf94574f0a119742d2ef78d5746b3ea9a90 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Thu, 18 Feb 2021 16:43:48 +0100 Subject: [PATCH 20/60] Requester finished. --- .../availability-distribution/src/error.rs | 4 ++ .../src/fetch_task.rs | 9 ++-- .../availability-distribution/src/lib.rs | 45 ++++++++++++++----- .../availability-distribution/src/state.rs | 39 +++++++++++++--- 4 files changed, 78 insertions(+), 19 deletions(-) diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs index c6ccba4ab61c..39a743c7ba24 100644 --- a/node/network/availability-distribution/src/error.rs +++ b/node/network/availability-distribution/src/error.rs @@ -63,6 +63,10 @@ pub enum Error { /// Reporting bad validators failed. #[error("Reporting bad validators failed")] ReportBadValidators(&'static str), + + /// Requester stream exhausted. + #[error("Erasure chunk requester stream exhausted")] + RequesterExhausted, } pub type Result = std::result::Result; diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/fetch_task.rs index d12e1c76c984..c2ba2c077010 100644 --- a/node/network/availability-distribution/src/fetch_task.rs +++ b/node/network/availability-distribution/src/fetch_task.rs @@ -255,7 +255,7 @@ impl RunningTask { self.store_chunk(chunk).await; break; } - self.conclude(bad_validators); + self.conclude(bad_validators).await; } /// Do request and return response, if successful. @@ -280,6 +280,7 @@ impl RunningTask { tracing::warn!( target: LOG_TARGET, origin= ?validator, + err= ?err, "Peer sent us invalid erasure chunk data" ); Err(TaskError::PeerError) @@ -288,11 +289,12 @@ impl RunningTask { tracing::warn!( target: LOG_TARGET, origin= ?validator, + err= ?err, "Some network error occurred when fetching erasure chunk" ); Err(TaskError::PeerError) } - Err(RequestError::Canceled(err)) => { + Err(RequestError::Canceled(oneshot::Canceled)) => { tracing::warn!(target: LOG_TARGET, origin= ?validator, "Erasure chunk request got canceled"); @@ -327,7 +329,8 @@ impl RunningTask { /// Store given chunk and log any error. async fn store_chunk(&mut self, chunk: ErasureChunk) { let (tx, rx) = oneshot::channel(); - let r = self.sender + let r = self + .sender .send(FromFetchTask::Message(AllMessages::AvailabilityStore( AvailabilityStoreMessage::StoreChunk { candidate_hash: self.request.candidate_hash, diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs index a02bcc2a38f3..09c34742150c 100644 --- a/node/network/availability-distribution/src/lib.rs +++ b/node/network/availability-distribution/src/lib.rs @@ -14,14 +14,17 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . - -use futures::{FutureExt, TryFutureExt}; +use futures::{future::Either, FutureExt, StreamExt, TryFutureExt}; use sp_keystore::SyncCryptoStorePtr; use polkadot_subsystem::{ - jaeger, errors::{ChainApiError, RuntimeApiError}, PerLeafSpan, - ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem, Subsystem, SubsystemContext, SubsystemError, messages::AvailabilityDistributionMessage + errors::{ChainApiError, RuntimeApiError}, + jaeger, + messages::AllMessages, + messages::AvailabilityDistributionMessage, + ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem, Subsystem, + SubsystemContext, SubsystemError, }; /// Error and [`Result`] type for this subsystem. @@ -42,8 +45,6 @@ mod session_cache; const LOG_TARGET: &'static str = "availability_distribution"; - - /// Availability Distribution metrics. /// TODO: Dummy for now. type Metrics = (); @@ -73,7 +74,6 @@ where } } - impl AvailabilityDistributionSubsystem { /// Create a new instance of the availability distribution. pub fn new(keystore: SyncCryptoStorePtr, metrics: Metrics) -> Self { @@ -85,13 +85,32 @@ impl AvailabilityDistributionSubsystem { where Context: SubsystemContext + Sync + Send, { - let mut state = ProtocolState::new(self.keystore.clone()); + let mut state = ProtocolState::new(self.keystore.clone()).fuse(); loop { - let message = ctx.recv().await?; + let action = { + let mut subsystem_next = ctx.recv().fuse(); + futures::select! { + subsystem_msg = subsystem_next => Either::Left(subsystem_msg), + from_task = state.next() => Either::Right(from_task), + } + }; + let message = match action { + Either::Left(subsystem_msg) => { + subsystem_msg.map_err(|e| Error::IncomingMessageChannel(e))? + } + Either::Right(from_task) => { + let from_task = from_task.ok_or(Error::RequesterExhausted)??; + ctx.send_message(from_task).await; + continue; + } + }; match message { FromOverseer::Signal(OverseerSignal::ActiveLeaves(update)) => { // Update the relay chain heads we are fetching our pieces for: - state.update_fetching_heads(&mut ctx, update).await?; + state + .get_mut() + .update_fetching_heads(&mut ctx, update) + .await?; } FromOverseer::Signal(OverseerSignal::BlockFinalized(..)) => {} FromOverseer::Signal(OverseerSignal::Conclude) => { @@ -106,8 +125,12 @@ impl AvailabilityDistributionSubsystem { "To be implemented, see: https://github.com/paritytech/polkadot/issues/2306!", ); } + FromOverseer::Communication { + msg: AvailabilityDistributionMessage::NetworkBridgeUpdateV1(_), + } => { + // There are currently no bridge updates we are interested in. + } } } } } - diff --git a/node/network/availability-distribution/src/state.rs b/node/network/availability-distribution/src/state.rs index 10e6ddfef473..5eeb92808ccd 100644 --- a/node/network/availability-distribution/src/state.rs +++ b/node/network/availability-distribution/src/state.rs @@ -58,11 +58,14 @@ use std::collections::{ hash_set::HashSet, }; use std::iter::IntoIterator; +use std::pin::Pin; use std::sync::Arc; -use futures::channel::{mpsc, oneshot}; -use futures::StreamExt; -use itertools::{Either, Itertools}; +use futures::{ + channel::{mpsc, oneshot}, + task::{Context, Poll}, + Stream, StreamExt, +}; use jaeger::JaegerSpan; use sp_keystore::SyncCryptoStorePtr; @@ -75,14 +78,14 @@ use polkadot_primitives::v1::{ use polkadot_subsystem::{ errors::{ChainApiError, RuntimeApiError}, jaeger, - messages::AvailabilityDistributionMessage, + messages::{AllMessages, AvailabilityDistributionMessage}, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem, Subsystem, SubsystemContext, SubsystemError, }; use super::{ error::recv_runtime, - fetch_task::{FetchTask, FromFetchTask, FetchTaskConfig}, + fetch_task::{FetchTask, FetchTaskConfig, FromFetchTask}, session_cache::SessionCache, Result, LOG_TARGET, }; @@ -210,6 +213,32 @@ impl ProtocolState { } } +impl Stream for ProtocolState { + type Item = Result; + + fn poll_next( + mut self: Pin<&mut Self>, + ctx: &mut Context, + ) -> Poll>> { + loop { + match Pin::new(&mut self.rx).poll_next(ctx) { + Poll::Ready(Some(FromFetchTask::Message(m))) => { + return Poll::Ready(Some(Ok(m))) + } + Poll::Ready(Some(FromFetchTask::Concluded(Some(bad_boys)))) => { + match self.session_cache.report_bad(bad_boys) { + Err(err) => return Poll::Ready(Some(Err(err))), + Ok(()) => continue, + } + } + Poll::Ready(Some(FromFetchTask::Concluded(None))) => continue, + Poll::Ready(None) => return Poll::Ready(None), + Poll::Pending => return Poll::Pending, + } + } + } +} + ///// Query all hashes and descriptors of candidates pending availability at a particular block. // #[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))] async fn query_occupied_cores( From 452b55f2cd7582f10ebaa25bf4dbd7818dbc0dae Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Thu, 18 Feb 2021 18:42:52 +0100 Subject: [PATCH 21/60] ProtocolState -> Requester Also make sure to not fetch our own chunk. --- Cargo.lock | 8 +- .../availability-distribution/Cargo.toml | 1 - .../availability-distribution/src/lib.rs | 12 +-- .../src/{state.rs => requester.rs} | 77 ++++++------------- .../src/{ => requester}/fetch_task.rs | 46 +++++++---- .../src/session_cache.rs | 32 ++++++-- 6 files changed, 91 insertions(+), 85 deletions(-) rename node/network/availability-distribution/src/{state.rs => requester.rs} (64%) rename node/network/availability-distribution/src/{ => requester}/fetch_task.rs (93%) diff --git a/Cargo.lock b/Cargo.lock index 958f491b5adf..84d0aedb5c27 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3458,9 +3458,9 @@ dependencies = [ [[package]] name = "lru" -version = "0.6.3" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3aae342b73d57ad0b8b364bd12584819f2c1fe9114285dfcf8b0722607671635" +checksum = "1f374d42cdfc1d7dbf3d3dec28afab2eb97ffbf43a3234d795b5986dbf4b90ba" dependencies = [ "hashbrown", ] @@ -5067,15 +5067,19 @@ version = "0.1.0" dependencies = [ "assert_matches", "futures 0.3.12", + "lru", "maplit", "parity-scale-codec", "polkadot-erasure-coding", + "polkadot-node-core-runtime-api", "polkadot-node-network-protocol", "polkadot-node-subsystem", "polkadot-node-subsystem-test-helpers", "polkadot-node-subsystem-util", "polkadot-primitives", + "rand 0.8.3", "sc-keystore", + "sc-network", "sp-application-crypto", "sp-core", "sp-keyring", diff --git a/node/network/availability-distribution/Cargo.toml b/node/network/availability-distribution/Cargo.toml index 7bbd73bdbfd6..5483d87deff7 100644 --- a/node/network/availability-distribution/Cargo.toml +++ b/node/network/availability-distribution/Cargo.toml @@ -19,7 +19,6 @@ sp-application-crypto = { git = "https://github.com/paritytech/substrate", branc sp-core = { git = "https://github.com/paritytech/substrate", branch = "master", features = ["std"] } sp-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" } thiserror = "1.0.23" -itertools = "0.10.0" rand = "0.8.3" lru = "0.6.5" diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs index 09c34742150c..46dcd74f7498 100644 --- a/node/network/availability-distribution/src/lib.rs +++ b/node/network/availability-distribution/src/lib.rs @@ -32,13 +32,9 @@ mod error; pub use error::Error; use error::Result; -/// The actual implementation of running availability distribution. -mod state; -/// State of a running availability-distribution subsystem. -use state::ProtocolState; - -/// A task fetching a particular chunk. -mod fetch_task; +/// `Requester` taking care of requesting chunks for candidates pending availability. +mod requester; +use requester::Requester; /// Cache for session information. mod session_cache; @@ -85,7 +81,7 @@ impl AvailabilityDistributionSubsystem { where Context: SubsystemContext + Sync + Send, { - let mut state = ProtocolState::new(self.keystore.clone()).fuse(); + let mut state = Requester::new(self.keystore.clone()).fuse(); loop { let action = { let mut subsystem_next = ctx.recv().fuse(); diff --git a/node/network/availability-distribution/src/state.rs b/node/network/availability-distribution/src/requester.rs similarity index 64% rename from node/network/availability-distribution/src/state.rs rename to node/network/availability-distribution/src/requester.rs index 5eeb92808ccd..58237f8deb9b 100644 --- a/node/network/availability-distribution/src/state.rs +++ b/node/network/availability-distribution/src/requester.rs @@ -14,44 +14,8 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -//! `ProtocolState` representing a running availability distribution subsystem. -//! -//! We keep track of [`FetchTask`]s, which get created on [`ActiveLeavesUpdate`]s for each occupied -//! core in the leaves, if we have not yet created it before. We keep track for which -//! relay parents a `FetchTask` is considered live (corresponding slot is occupied with the -//! candidate fetched). Once there is no relay parent left for which that task is considered live, -//! it gets removed. -//! -//! We keep that task around as long as its corresponding candidate is considered pending -//! availability, even if we fetched our chunk already. This is so we won't fetch our piece again, -//! just because the candidate is still pending availability in the next block. -//! -//! We are also dependent on session information. We need to know which validators are in a -//! particular validator group, backing our candidate, so we can request our erasure chunk from -//! them. -//! -//! We want to randomize the list of validators in each group, so we get a -//! random order of validators to try to get the chunk from. This is to ensure load balancing, each -//! requesting validator should have a different order, thus trying different validators. -//! -//! But We would like to keep that randomized order around for an entire session, so our particular -//! validator will always request from the same validators, thus making sure it will find an open -//! network connection on each request. -//! -//! (TODO: What to do on session boundaries? Initial delay acceptable? Connect with some fake -//! request to future validators? Use a peer set after all and connect that to the future session?) -//! -//! So we need to keep some customized session info around, which seems to be a good idea for -//! performance reasons anyway. That's where `SessionCache` comes into play. It is used to keep -//! session information around as long as we need it. But how long do we need it? How do we manage -//! that cache? We can't rely on `ActiveLeavesUpdate`s heads alone, as we might get occupied slots -//! for heads we never got an `ActiveLeavesUpdate` from, therefore we don't populate the session -//! cache with sessions our leaves correspond to, but directly with the sessions of the relay -//! parents of our `CandidateDescriptor`s. So, its clear how to populate the cache, but when can we -//! get rid of cached session information? If for sure is safe to do when there is no -//! candidate/FetchTask around anymore which references it. Thus the cache simply consists of -//! `Weak` pointers to the actual session infos and the `FetchTask`s keep `Rc`s, therefore we know -//! exactly when we can get rid of a cache entry by means of the Weak pointer evaluating to `None`. +//! Requester takes care of requesting erasure chunks for candidates that are pending +//! availability. use std::collections::{ hash_map::{Entry, HashMap}, @@ -83,21 +47,26 @@ use polkadot_subsystem::{ SubsystemContext, SubsystemError, }; -use super::{ - error::recv_runtime, - fetch_task::{FetchTask, FetchTaskConfig, FromFetchTask}, - session_cache::SessionCache, - Result, LOG_TARGET, -}; +use super::{error::recv_runtime, session_cache::SessionCache, Result, LOG_TARGET}; + +/// A task fetching a particular chunk. +mod fetch_task; +use fetch_task::{FetchTask, FetchTaskConfig, FromFetchTask}; -/// A running instance of this subsystem. -pub struct ProtocolState { +/// Requester takes care of requesting erasure chunks from backing groups and stores them in the +/// av store. +/// +/// It implements a stream that needs to be advanced for it making progress. +pub struct Requester { /// Candidates we need to fetch our chunk for. + /// + /// We keep those around as long as a candidate is pending availability on some leaf, so we + /// won't fetch chunks multiple times. fetches: HashMap, /// Localized information about sessions we are currently interested in. /// - /// This is usually the current one and at session boundaries also the last one. + /// This is the current one and the last one. session_cache: SessionCache, /// Sender to be cloned for `FetchTask`s. @@ -107,11 +76,15 @@ pub struct ProtocolState { rx: mpsc::Receiver, } -impl ProtocolState { +impl Requester { + /// Create a new `Requester`. + /// + /// You must feed it with `ActiveLeavesUpdate` via `update_fetching_heads` and make it progress + /// by advancing the stream. pub(crate) fn new(keystore: SyncCryptoStorePtr) -> Self { // All we do is forwarding messages, no need to make this big. let (tx, rx) = mpsc::channel(1); - ProtocolState { + Requester { fetches: HashMap::new(), session_cache: SessionCache::new(keystore), tx, @@ -120,7 +93,7 @@ impl ProtocolState { } /// Update heads that need availability distribution. /// - /// For all active heads we will be fetching our chunk for availabilty distribution. + /// For all active heads we will be fetching our chunks for availabilty distribution. pub(crate) async fn update_fetching_heads( &mut self, ctx: &mut Context, @@ -159,8 +132,6 @@ impl ProtocolState { /// Stop requesting chunks for obsolete heads. /// - /// Returns relay_parents which became irrelevant for availability fetching (are not - /// referenced by any candidate anymore). fn stop_requesting_chunks(&mut self, obsolete_leaves: impl Iterator) { let obsolete_leaves: HashSet<_> = obsolete_leaves.into_iter().collect(); self.fetches.retain(|&c_hash, task| { @@ -213,7 +184,7 @@ impl ProtocolState { } } -impl Stream for ProtocolState { +impl Stream for Requester { type Item = Result; fn poll_next( diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task.rs similarity index 93% rename from node/network/availability-distribution/src/fetch_task.rs rename to node/network/availability-distribution/src/requester/fetch_task.rs index c2ba2c077010..6626316c455f 100644 --- a/node/network/availability-distribution/src/fetch_task.rs +++ b/node/network/availability-distribution/src/requester/fetch_task.rs @@ -43,7 +43,7 @@ use polkadot_subsystem::{ Subsystem, SubsystemContext, SubsystemError, SubsystemResult, }; -use super::{ +use crate::{ error::{Error, Result}, session_cache::{BadValidators, SessionInfo}, LOG_TARGET, @@ -54,7 +54,7 @@ use super::{ /// This exists to separate preparation of a `FetchTask` from actual starting it, which is /// beneficial as this allows as for taking session info by reference. pub struct FetchTaskConfig { - prepared_running: RunningTask, + prepared_running: Option, live_in: HashSet, } @@ -129,7 +129,17 @@ impl FetchTaskConfig { sender: mpsc::Sender, session_info: &SessionInfo, ) -> Self { - let prepared_running = RunningTask { + let live_in = vec![leaf].into_iter().collect(); + + // Don't run tasks for our backing group: + if session_info.our_group == core.group_responsible { + return FetchTaskConfig { + live_in, + prepared_running: None, + }; + } + + let prepared_running = RunningTask { session_index: session_info.session_index, group_index: core.group_responsible, group: session_info.validator_groups.get(core.group_responsible.0 as usize).expect("The responsible group of a candidate should be available in the corresponding session. qed.").clone(), @@ -142,8 +152,8 @@ impl FetchTaskConfig { sender, }; FetchTaskConfig { - live_in: vec![leaf].into_iter().collect(), - prepared_running, + live_in, + prepared_running: Some(prepared_running), } } } @@ -158,14 +168,24 @@ impl FetchTask { prepared_running, live_in, } = config; - let (handle, kill) = oneshot::channel(); - ctx.spawn("chunk-fetcher", prepared_running.run(kill).boxed()) - .await - .map_err(|e| Error::SpawnTask(e))?; - Ok(FetchTask { - live_in, - state: FetchedState::Started(handle), - }) + + if let Some(running) = prepared_running { + let (handle, kill) = oneshot::channel(); + + ctx.spawn("chunk-fetcher", running.run(kill).boxed()) + .await + .map_err(|e| Error::SpawnTask(e))?; + + Ok(FetchTask { + live_in, + state: FetchedState::Started(handle), + }) + } else { + Ok(FetchTask { + live_in, + state: FetchedState::Canceled, + }) + } } /// Add the given leaf to the relay parents which are making this task relevant. diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs index f403df2b0c09..fd4af299a5de 100644 --- a/node/network/availability-distribution/src/session_cache.rs +++ b/node/network/availability-distribution/src/session_cache.rs @@ -80,10 +80,9 @@ pub struct SessionInfo { /// Information about ourself: pub our_index: ValidatorIndex, - //// Remember to which group we blong, so we won't start fetching chunks for candidates we - //// backed our selves. - // TODO: Implement this: - // pub our_group: GroupIndex, + /// Remember to which group we belong, so we won't start fetching chunks for candidates those + /// candidates (We should have them via PoV distribution). + pub our_group: GroupIndex, } /// Report of bad validators. @@ -133,10 +132,10 @@ impl SessionCache { ctx: &mut Context, parent: Hash, with_info: F, - ) -> Result> - where + ) -> Result> + where Context: SubsystemContext, - F: FnOnce(&SessionInfo) -> R + F: FnOnce(&SessionInfo) -> R, { let session_index = match self.session_index_cache.get(&parent) { Some(index) => *index, @@ -150,7 +149,7 @@ impl SessionCache { }; if let Some(info) = self.session_info_cache.get(&session_index) { - return Ok(Some(with_info(info))) + return Ok(Some(with_info(info))); } if let Some(info) = self @@ -205,6 +204,22 @@ impl SessionCache { .ok_or(Error::NoSuchSession(session_index))?; if let Some(our_index) = self.get_our_index(validators).await { + // Get our group index: + let our_group = validator_groups + .iter() + .enumerate() + .find_map(|(i, g)| { + g.iter().find_map(|v| { + if *v == our_index { + Some(GroupIndex(i as u32)) + } else { + None + } + }) + }) + // TODO: Make sure this is correct and should be enforced: + .expect("Every validator should be in a validator group. qed."); + // Shuffle validators in groups: let mut rng = thread_rng(); for g in validator_groups.iter_mut() { @@ -228,6 +243,7 @@ impl SessionCache { validator_groups, our_index, session_index, + our_group, }; return Ok(Some(info)); } From 2b9b983126d13d2e15eec1e2da558680bf06c6a9 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Thu, 18 Feb 2021 22:23:11 +0100 Subject: [PATCH 22/60] Cleanup + fixes. --- .../availability-distribution/src/error.rs | 5 +---- .../availability-distribution/src/lib.rs | 8 ++------ .../src/requester.rs | 20 ++++++------------- .../src/requester/fetch_task.rs | 19 +++++++----------- .../src/session_cache.rs | 14 ++++--------- .../protocol/src/request_response/v1.rs | 2 ++ runtime/parachains/src/inclusion.rs | 6 +++--- 7 files changed, 25 insertions(+), 49 deletions(-) diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs index 39a743c7ba24..d442f7e00686 100644 --- a/node/network/availability-distribution/src/error.rs +++ b/node/network/availability-distribution/src/error.rs @@ -21,10 +21,7 @@ use futures::channel::oneshot; use polkadot_node_subsystem_util::Error as UtilError; use polkadot_primitives::v1::SessionIndex; -use polkadot_subsystem::{ - errors::{ChainApiError, RuntimeApiError}, - SubsystemError, -}; +use polkadot_subsystem::{errors::RuntimeApiError, SubsystemError}; #[derive(Debug, Error)] pub enum Error { diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs index 46dcd74f7498..c0a156a53134 100644 --- a/node/network/availability-distribution/src/lib.rs +++ b/node/network/availability-distribution/src/lib.rs @@ -19,12 +19,8 @@ use futures::{future::Either, FutureExt, StreamExt, TryFutureExt}; use sp_keystore::SyncCryptoStorePtr; use polkadot_subsystem::{ - errors::{ChainApiError, RuntimeApiError}, - jaeger, - messages::AllMessages, - messages::AvailabilityDistributionMessage, - ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem, Subsystem, - SubsystemContext, SubsystemError, + messages::AvailabilityDistributionMessage, FromOverseer, OverseerSignal, SpawnedSubsystem, + Subsystem, SubsystemContext, SubsystemError, }; /// Error and [`Result`] type for this subsystem. diff --git a/node/network/availability-distribution/src/requester.rs b/node/network/availability-distribution/src/requester.rs index 58237f8deb9b..c1597bf295cc 100644 --- a/node/network/availability-distribution/src/requester.rs +++ b/node/network/availability-distribution/src/requester.rs @@ -26,28 +26,20 @@ use std::pin::Pin; use std::sync::Arc; use futures::{ - channel::{mpsc, oneshot}, + channel::mpsc, task::{Context, Poll}, - Stream, StreamExt, + Stream, }; -use jaeger::JaegerSpan; use sp_keystore::SyncCryptoStorePtr; use polkadot_node_subsystem_util::request_availability_cores_ctx; -use polkadot_primitives::v1::{ - BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, ErasureChunk, Hash, HashT, - OccupiedCore, SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID, -}; +use polkadot_primitives::v1::{CandidateHash, CoreState, Hash, OccupiedCore}; use polkadot_subsystem::{ - errors::{ChainApiError, RuntimeApiError}, - jaeger, - messages::{AllMessages, AvailabilityDistributionMessage}, - ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem, Subsystem, - SubsystemContext, SubsystemError, + messages::AllMessages, ActiveLeavesUpdate, JaegerSpan, SubsystemContext, }; -use super::{error::recv_runtime, session_cache::SessionCache, Result, LOG_TARGET}; +use super::{error::recv_runtime, session_cache::SessionCache, Result}; /// A task fetching a particular chunk. mod fetch_task; @@ -134,7 +126,7 @@ impl Requester { /// fn stop_requesting_chunks(&mut self, obsolete_leaves: impl Iterator) { let obsolete_leaves: HashSet<_> = obsolete_leaves.into_iter().collect(); - self.fetches.retain(|&c_hash, task| { + self.fetches.retain(|_, task| { task.remove_leaves(&obsolete_leaves); task.is_live() }) diff --git a/node/network/availability-distribution/src/requester/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task.rs index 6626316c455f..9aa4ace7bc10 100644 --- a/node/network/availability-distribution/src/requester/fetch_task.rs +++ b/node/network/availability-distribution/src/requester/fetch_task.rs @@ -15,8 +15,6 @@ // along with Polkadot. If not, see . use std::collections::HashSet; -use std::pin::Pin; -use std::rc::Rc; use futures::channel::mpsc; use futures::channel::oneshot; @@ -29,19 +27,13 @@ use polkadot_node_network_protocol::request_response::{ v1::{AvailabilityFetchingRequest, AvailabilityFetchingResponse}, }; use polkadot_primitives::v1::{ - AuthorityDiscoveryId, BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, - ErasureChunk, GroupIndex, Hash, HashT, OccupiedCore, SessionIndex, ValidatorId, - ValidatorIndex, PARACHAIN_KEY_TYPE_ID, + AuthorityDiscoveryId, BlakeTwo256, ErasureChunk, GroupIndex, Hash, HashT, OccupiedCore, + SessionIndex, }; use polkadot_subsystem::messages::{ - AllMessages, AvailabilityDistributionMessage, AvailabilityStoreMessage, ChainApiMessage, - NetworkBridgeEvent, NetworkBridgeMessage, RuntimeApiMessage, RuntimeApiRequest, -}; -use polkadot_subsystem::{ - errors::{ChainApiError, RuntimeApiError}, - jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem, - Subsystem, SubsystemContext, SubsystemError, SubsystemResult, + AllMessages, AvailabilityStoreMessage, NetworkBridgeMessage, }; +use polkadot_subsystem::SubsystemContext; use crate::{ error::{Error, Result}, @@ -360,6 +352,9 @@ impl RunningTask { }, ))) .await; + if let Err(err) = r { + tracing::error!(target: LOG_TARGET, err= ?err, "Storing erasure chunk failed, system shutting down?"); + } if let Err(oneshot::Canceled) = rx.await { tracing::error!(target: LOG_TARGET, "Storing erasure chunk failed"); diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs index fd4af299a5de..cb2293fe8614 100644 --- a/node/network/availability-distribution/src/session_cache.rs +++ b/node/network/availability-distribution/src/session_cache.rs @@ -14,8 +14,7 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use std::collections::{HashMap, HashSet}; -use std::rc::{Rc, Weak}; +use std::collections::HashSet; use lru::LruCache; use rand::{seq::SliceRandom, thread_rng}; @@ -29,18 +28,13 @@ use polkadot_node_subsystem_util::{ }; use polkadot_primitives::v1::SessionInfo as GlobalSessionInfo; use polkadot_primitives::v1::{ - AuthorityDiscoveryId, BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, - ErasureChunk, GroupIndex, Hash, HashT, SessionIndex, ValidatorId, ValidatorIndex, - PARACHAIN_KEY_TYPE_ID, -}; -use polkadot_subsystem::{ - jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem, - Subsystem, SubsystemContext, SubsystemError, + AuthorityDiscoveryId, GroupIndex, Hash, SessionIndex, ValidatorId, ValidatorIndex, }; +use polkadot_subsystem::SubsystemContext; use super::{ error::{recv_runtime, Result}, - Error, LOG_TARGET, + Error, }; /// Caching of session info as needed by availability distribution. diff --git a/node/network/protocol/src/request_response/v1.rs b/node/network/protocol/src/request_response/v1.rs index 53e58f6b48b1..24e6363b963d 100644 --- a/node/network/protocol/src/request_response/v1.rs +++ b/node/network/protocol/src/request_response/v1.rs @@ -26,7 +26,9 @@ use super::Protocol; /// Request an availability chunk. #[derive(Debug, Copy, Clone, Encode, Decode)] pub struct AvailabilityFetchingRequest { + /// Hash of candidate we want a chunk for. pub candidate_hash: CandidateHash, + /// The index of the chunk to fetch. pub index: ValidatorIndex, } diff --git a/runtime/parachains/src/inclusion.rs b/runtime/parachains/src/inclusion.rs index 68a0b6f01f1e..7329ff2e65be 100644 --- a/runtime/parachains/src/inclusion.rs +++ b/runtime/parachains/src/inclusion.rs @@ -297,7 +297,7 @@ impl Module { Error::::UnoccupiedBitInBitfield, ); - let validator_public = &validators[signed_bitfield.validator_index() as usize]; + let validator_public = &validators[signed_bitfield.validator_index().0 as usize]; signed_bitfield.check_signature( &signing_context, @@ -319,7 +319,7 @@ impl Module { // defensive check - this is constructed by loading the availability bitfield record, // which is always `Some` if the core is occupied - that's why we're here. - let val_idx = signed_bitfield.validator_index() as usize; + let val_idx = signed_bitfield.validator_index().0 as usize; if let Some(mut bit) = pending_availability.as_mut() .and_then(|r| r.availability_votes.get_mut(val_idx)) { @@ -532,7 +532,7 @@ impl Module { &signing_context, group_vals.len(), |idx| group_vals.get(idx) - .and_then(|i| validators.get(*i as usize)) + .and_then(|i| validators.get(*i.0 as usize)) .map(|v| v.clone()), ); From d683f102b0a156c0f888541488d246140319f668 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Thu, 18 Feb 2021 22:24:40 +0100 Subject: [PATCH 23/60] Remove unused functions - FetchTask::is_finished - SessionCache::fetch_session_info --- .../src/requester/fetch_task.rs | 10 ---------- .../src/session_cache.rs | 15 --------------- 2 files changed, 25 deletions(-) diff --git a/node/network/availability-distribution/src/requester/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task.rs index 9aa4ace7bc10..f228965ba17a 100644 --- a/node/network/availability-distribution/src/requester/fetch_task.rs +++ b/node/network/availability-distribution/src/requester/fetch_task.rs @@ -194,16 +194,6 @@ impl FetchTask { } } - /// Whether or not this task can be considered finished. - /// - /// That is, it is either canceled, succeeded or failed. - pub fn is_finished(&self) -> bool { - match &self.state { - FetchedState::Canceled => true, - FetchedState::Started(sender) => sender.is_canceled(), - } - } - /// Whether or not there are still relay parents around with this candidate pending /// availability. pub fn is_live(&self) -> bool { diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs index cb2293fe8614..111459a542e0 100644 --- a/node/network/availability-distribution/src/session_cache.rs +++ b/node/network/availability-distribution/src/session_cache.rs @@ -99,21 +99,6 @@ impl SessionCache { keystore, } } - /// Retrieve session info for the given relay parent. - /// - /// This function will query the cache first and will only query the runtime on cache miss. - /// - /// Returns: `Ok(None)` in case this node is not a validator in the current session. - pub async fn fetch_session_info( - &mut self, - ctx: &mut Context, - parent: Hash, - ) -> Result> - where - Context: SubsystemContext, - { - self.with_session_info(ctx, parent, Clone::clone).await - } /// Tries to retrieve `SessionInfo` and calls `with_info` if successful. /// From d7a8a312d1b53c91df9c22b6e6166748bd2ae825 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 19 Feb 2021 08:31:13 +0100 Subject: [PATCH 24/60] availability-distribution responding side. --- .../availability-distribution/src/error.rs | 4 ++ .../availability-distribution/src/lib.rs | 12 ++-- .../src/requester/fetch_task.rs | 9 ++- .../src/responder.rs | 65 +++++++++++++++++++ .../protocol/src/request_response/v1.rs | 3 + 5 files changed, 85 insertions(+), 8 deletions(-) create mode 100644 node/network/availability-distribution/src/responder.rs diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs index d442f7e00686..658bad97f3ce 100644 --- a/node/network/availability-distribution/src/error.rs +++ b/node/network/availability-distribution/src/error.rs @@ -64,6 +64,10 @@ pub enum Error { /// Requester stream exhausted. #[error("Erasure chunk requester stream exhausted")] RequesterExhausted, + + /// Sending response failed. + #[error("Sending a request's response failed.")] + SendResponse, } pub type Result = std::result::Result; diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs index c0a156a53134..9ee9661affe0 100644 --- a/node/network/availability-distribution/src/lib.rs +++ b/node/network/availability-distribution/src/lib.rs @@ -32,6 +32,10 @@ use error::Result; mod requester; use requester::Requester; +/// Responding to erasure chunk requests: +mod responder; +use responder::answer_request; + /// Cache for session information. mod session_cache; @@ -109,13 +113,9 @@ impl AvailabilityDistributionSubsystem { return Ok(()); } FromOverseer::Communication { - msg: AvailabilityDistributionMessage::AvailabilityFetchingRequest(_), + msg: AvailabilityDistributionMessage::AvailabilityFetchingRequest(req), } => { - // TODO: Implement issue 2306: - tracing::warn!( - target: LOG_TARGET, - "To be implemented, see: https://github.com/paritytech/polkadot/issues/2306!", - ); + answer_request(&mut ctx, req).await? } FromOverseer::Communication { msg: AvailabilityDistributionMessage::NetworkBridgeUpdateV1(_), diff --git a/node/network/availability-distribution/src/requester/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task.rs index f228965ba17a..501a30abce70 100644 --- a/node/network/availability-distribution/src/requester/fetch_task.rs +++ b/node/network/availability-distribution/src/requester/fetch_task.rs @@ -234,17 +234,22 @@ impl RunningTask { target: LOG_TARGET, "Node seems to be shutting down, canceling fetch task" ); - return; + return } Err(TaskError::PeerError) => { bad_validators.push(validator); - continue; + continue } }; let chunk = match resp { AvailabilityFetchingResponse::Chunk(resp) => { resp.reconstruct_erasure_chunk(&self.request) } + AvailabilityFetchingResponse::NoSuchChunk => { + tracing::debug!(target: LOG_TARGET, validator = ?validator, "Validator did not have our chunk"); + bad_validators.push(validator); + continue + } }; // Data genuine? diff --git a/node/network/availability-distribution/src/responder.rs b/node/network/availability-distribution/src/responder.rs new file mode 100644 index 000000000000..23ec112030df --- /dev/null +++ b/node/network/availability-distribution/src/responder.rs @@ -0,0 +1,65 @@ +// Copyright 2021 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +//! Responder answers requests for availability chunks. + +use futures::channel::oneshot; + +use polkadot_node_network_protocol::request_response::{request::IncomingRequest, v1}; +use polkadot_primitives::v1::{CandidateHash, ErasureChunk, ValidatorIndex}; +use polkadot_subsystem::{ + messages::{AllMessages, AvailabilityStoreMessage}, + SubsystemContext, +}; + +use crate::error::{Error, Result}; +use crate::LOG_TARGET; + +/// Answer an incoming chunk request by querying the av store. +pub async fn answer_request( + ctx: &mut Context, + req: IncomingRequest, +) -> Result<()> +where + Context: SubsystemContext, +{ + let chunk = query_chunk(ctx, req.payload.candidate_hash, req.payload.index).await?; + + let response = match chunk { + None => v1::AvailabilityFetchingResponse::NoSuchChunk, + Some(chunk) => v1::AvailabilityFetchingResponse::Chunk(chunk.into()), + }; + + req.send_response(response).map_err(|_| Error::SendResponse) +} + +#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))] +async fn query_chunk( + ctx: &mut Context, + candidate_hash: CandidateHash, + validator_index: ValidatorIndex, +) -> Result> +where + Context: SubsystemContext, +{ + let (tx, rx) = oneshot::channel(); + ctx.send_message(AllMessages::AvailabilityStore( + AvailabilityStoreMessage::QueryChunk(candidate_hash, validator_index, tx), + )) + .await; + + rx.await.map_err(|e| Error::QueryChunkResponseChannel(e)) +} diff --git a/node/network/protocol/src/request_response/v1.rs b/node/network/protocol/src/request_response/v1.rs index 24e6363b963d..06e4ea522086 100644 --- a/node/network/protocol/src/request_response/v1.rs +++ b/node/network/protocol/src/request_response/v1.rs @@ -38,6 +38,9 @@ pub enum AvailabilityFetchingResponse { /// The requested chunk data. #[codec(index = 0)] Chunk(ChunkResponse), + /// Node was not in possession of the requested chunk. + #[codec(index = 1)] + NoSuchChunk, } /// Skimmed down variant of `ErasureChunk`. From 3fed607083ddd4cfea1ef43f2e8a58d2662f8752 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 19 Feb 2021 08:31:32 +0100 Subject: [PATCH 25/60] Cleanup + Fixes. --- node/network/availability-distribution/src/session_cache.rs | 2 +- primitives/src/v0.rs | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs index 111459a542e0..2b40c3db7b2d 100644 --- a/node/network/availability-distribution/src/session_cache.rs +++ b/node/network/availability-distribution/src/session_cache.rs @@ -90,7 +90,7 @@ pub struct BadValidators { } impl SessionCache { - pub(crate) fn new(keystore: SyncCryptoStorePtr) -> Self { + pub fn new(keystore: SyncCryptoStorePtr) -> Self { SessionCache { // 5 relatively conservative, 1 to 2 should suffice: session_index_cache: LruCache::new(5), diff --git a/primitives/src/v0.rs b/primitives/src/v0.rs index ecb9f9cf3e53..c56c02c46641 100644 --- a/primitives/src/v0.rs +++ b/primitives/src/v0.rs @@ -114,9 +114,12 @@ impl MallocSizeOf for ValidatorId { } /// Index of the validator is used as a lightweight replacement of the `ValidatorId` when appropriate. +#[cfg(not(feature = "std"))] #[derive(Eq, Ord, PartialEq, PartialOrd, Hash, Copy, Clone, Encode, Decode)] +pub struct ValidatorIndex(pub u32); + #[cfg(feature = "std")] -#[derive(Debug, MallocSizeOf)] +#[derive(Eq, Ord, PartialEq, PartialOrd, Hash, Copy, Clone, Encode, Decode, Debug, MallocSizeOf)] pub struct ValidatorIndex(pub u32); // We should really get https://github.com/paritytech/polkadot/issues/2403 going .. From 39d6bc2b50563a638d23f7b762d5bad601cb7536 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 19 Feb 2021 08:53:41 +0100 Subject: [PATCH 26/60] More fixes. --- node/core/av-store/src/lib.rs | 2 +- node/core/backing/src/lib.rs | 4 ++-- node/core/provisioner/src/lib.rs | 2 +- node/network/availability-recovery/src/lib.rs | 2 +- node/network/bitfield-distribution/src/lib.rs | 6 +++--- node/network/collator-protocol/src/collator_side.rs | 4 ++-- node/network/pov-distribution/src/lib.rs | 2 +- node/network/statement-distribution/src/lib.rs | 2 +- primitives/src/v0.rs | 2 +- runtime/parachains/src/inclusion.rs | 10 +++++----- runtime/parachains/src/reward_points.rs | 2 +- runtime/parachains/src/scheduler.rs | 2 +- 12 files changed, 20 insertions(+), 20 deletions(-) diff --git a/node/core/av-store/src/lib.rs b/node/core/av-store/src/lib.rs index 0b4806b3157b..66846f7ecc88 100644 --- a/node/core/av-store/src/lib.rs +++ b/node/core/av-store/src/lib.rs @@ -968,7 +968,7 @@ fn process_message( AvailabilityStoreMessage::QueryChunkAvailability(candidate, validator_index, tx) => { let a = load_meta(&subsystem.db, &candidate)? .map_or(false, |m| - *m.chunks_stored.get(validator_index as usize).as_deref().unwrap_or(&false) + *m.chunks_stored.get(validator_index.0 as usize).as_deref().unwrap_or(&false) ); let _ = tx.send(a); } diff --git a/node/core/backing/src/lib.rs b/node/core/backing/src/lib.rs index 5a8a78369585..495493132ce3 100644 --- a/node/core/backing/src/lib.rs +++ b/node/core/backing/src/lib.rs @@ -861,7 +861,7 @@ impl CandidateBackingJob { #[tracing::instrument(level = "trace", skip(self), fields(subsystem = LOG_TARGET))] fn check_statement_signature(&self, statement: &SignedFullStatement) -> Result<(), Error> { - let idx = statement.validator_index() as usize; + let idx = statement.validator_index().0 as usize; if self.table_context.validators.len() > idx { statement.check_signature( @@ -902,7 +902,7 @@ impl CandidateBackingJob { ) -> Option { self.insert_or_get_unbacked_span(parent_span, hash).map(|span| { let mut span = span.child("import-statement"); - span.add_string_tag("validator-index", &format!("{}", validator)); + span.add_string_tag("validator-index", &format!("{:?}", validator)); span }) } diff --git a/node/core/provisioner/src/lib.rs b/node/core/provisioner/src/lib.rs index 8f9421b25f9d..df24f9ee5caf 100644 --- a/node/core/provisioner/src/lib.rs +++ b/node/core/provisioner/src/lib.rs @@ -507,7 +507,7 @@ fn bitfields_indicate_availability( let availability_len = availability.len(); for bitfield in bitfields { - let validator_idx = bitfield.validator_index() as usize; + let validator_idx = bitfield.validator_index().0 as usize; match availability.get_mut(validator_idx) { None => { // in principle, this function might return a `Result` so that we can more clearly express this error condition diff --git a/node/network/availability-recovery/src/lib.rs b/node/network/availability-recovery/src/lib.rs index 6b28c7295f0d..e000b03c620c 100644 --- a/node/network/availability-recovery/src/lib.rs +++ b/node/network/availability-recovery/src/lib.rs @@ -165,7 +165,7 @@ impl Interaction { let (tx, rx) = oneshot::channel(); self.to_state.send(FromInteraction::MakeRequest( - self.validator_authority_keys[validator_index as usize].clone(), + self.validator_authority_keys[validator_index.0 as usize].clone(), self.candidate_hash.clone(), validator_index, tx, diff --git a/node/network/bitfield-distribution/src/lib.rs b/node/network/bitfield-distribution/src/lib.rs index 1029acd6bb24..a3af301d2ffc 100644 --- a/node/network/bitfield-distribution/src/lib.rs +++ b/node/network/bitfield-distribution/src/lib.rs @@ -284,7 +284,7 @@ where return; } - let validator_index = signed_availability.validator_index() as usize; + let validator_index = signed_availability.validator_index().0 as usize; let validator = if let Some(validator) = validator_set.get(validator_index) { validator.clone() } else { @@ -410,7 +410,7 @@ where span.add_string_tag("peer-id", &origin.to_base58()); span.add_string_tag( "claimed-validator", - &message.signed_availability.validator_index().to_string(), + &message.signed_availability.validator_index().0.to_string(), ); span }; @@ -429,7 +429,7 @@ where // Use the (untrusted) validator index provided by the signed payload // and see if that one actually signed the availability bitset. let signing_context = job_data.signing_context.clone(); - let validator_index = message.signed_availability.validator_index() as usize; + let validator_index = message.signed_availability.validator_index().0 as usize; let validator = if let Some(validator) = validator_set.get(validator_index) { validator.clone() } else { diff --git a/node/network/collator-protocol/src/collator_side.rs b/node/network/collator-protocol/src/collator_side.rs index afd3bc1a4953..1689ed12c719 100644 --- a/node/network/collator-protocol/src/collator_side.rs +++ b/node/network/collator-protocol/src/collator_side.rs @@ -329,8 +329,8 @@ async fn determine_our_validators( let validators = request_validators_ctx(relay_parent, ctx).await?.await??; - let current_validators = current_validators.iter().map(|i| validators[*i as usize].clone()).collect(); - let next_validators = next_validators.iter().map(|i| validators[*i as usize].clone()).collect(); + let current_validators = current_validators.iter().map(|i| validators[i.0 as usize].clone()).collect(); + let next_validators = next_validators.iter().map(|i| validators[i.0 as usize].clone()).collect(); Ok((current_validators, next_validators)) } diff --git a/node/network/pov-distribution/src/lib.rs b/node/network/pov-distribution/src/lib.rs index 6527be99c3d5..d5043cb5b36a 100644 --- a/node/network/pov-distribution/src/lib.rs +++ b/node/network/pov-distribution/src/lib.rs @@ -336,7 +336,7 @@ async fn determine_validators_for_core( let validators = connect_to_validators .into_iter() - .map(|idx| validators[idx as usize].clone()) + .map(|idx| validators[idx.0 as usize].clone()) .collect(); Ok(Some(validators)) diff --git a/node/network/statement-distribution/src/lib.rs b/node/network/statement-distribution/src/lib.rs index 960ff129bacf..1eee14892ab7 100644 --- a/node/network/statement-distribution/src/lib.rs +++ b/node/network/statement-distribution/src/lib.rs @@ -494,7 +494,7 @@ fn check_statement_signature( parent_hash: relay_parent, }; - head.validators.get(statement.validator_index() as usize) + head.validators.get(statement.validator_index().0 as usize) .ok_or(()) .and_then(|v| statement.check_signature(&signing_context, v)) } diff --git a/primitives/src/v0.rs b/primitives/src/v0.rs index c56c02c46641..16e7bf88578d 100644 --- a/primitives/src/v0.rs +++ b/primitives/src/v0.rs @@ -670,7 +670,7 @@ pub struct AvailableData { } /// A chunk of erasure-encoded block data. -#[derive(PartialEq, Eq, Clone, Encode, Decode, Default)] +#[derive(PartialEq, Eq, Clone, Encode, Decode)] #[cfg_attr(feature = "std", derive(Serialize, Deserialize, Debug, Hash))] pub struct ErasureChunk { /// The erasure-encoded chunk of data belonging to the candidate block. diff --git a/runtime/parachains/src/inclusion.rs b/runtime/parachains/src/inclusion.rs index 7329ff2e65be..02d29aa2e1b3 100644 --- a/runtime/parachains/src/inclusion.rs +++ b/runtime/parachains/src/inclusion.rs @@ -288,7 +288,7 @@ impl Module { ); ensure!( - signed_bitfield.validator_index() < validators.len() as ValidatorIndex, + (signed_bitfield.validator_index().0 as usize) < validators.len(), Error::::ValidatorIndexOutOfBounds, ); @@ -532,7 +532,7 @@ impl Module { &signing_context, group_vals.len(), |idx| group_vals.get(idx) - .and_then(|i| validators.get(*i.0 as usize)) + .and_then(|i| validators.get(i.0 as usize)) .map(|v| v.clone()), ); @@ -551,7 +551,7 @@ impl Module { let val_idx = group_vals.get(bit_idx) .expect("this query done above; qed"); - backers.set(*val_idx as _, true); + backers.set(val_idx.0 as _, true); } } @@ -658,12 +658,12 @@ impl Module { T::RewardValidators::reward_backing(backers.iter().enumerate() .filter(|(_, backed)| **backed) - .map(|(i, _)| i as _) + .map(|(i, _)| ValidatorIndex(i as _)) ); T::RewardValidators::reward_bitfields(availability_votes.iter().enumerate() .filter(|(_, voted)| **voted) - .map(|(i, _)| i as _) + .map(|(i, _)| ValidatorIndex(i as _)) ); // initial weight is config read. diff --git a/runtime/parachains/src/reward_points.rs b/runtime/parachains/src/reward_points.rs index 7ff208d6d132..3fb8435e0916 100644 --- a/runtime/parachains/src/reward_points.rs +++ b/runtime/parachains/src/reward_points.rs @@ -38,7 +38,7 @@ fn reward_by_indices(points: u32, indices: I) where // and we are rewarding for behavior in current session. let validators = C::SessionInterface::validators(); let rewards = indices.into_iter() - .filter_map(|i| validators.get(i as usize).map(|v| v.clone())) + .filter_map(|i| validators.get(i.0 as usize).map(|v| v.clone())) .map(|v| (v, points)); >::reward_by_ids(rewards); diff --git a/runtime/parachains/src/scheduler.rs b/runtime/parachains/src/scheduler.rs index f21f6646d4cf..1436fca0dea3 100644 --- a/runtime/parachains/src/scheduler.rs +++ b/runtime/parachains/src/scheduler.rs @@ -263,7 +263,7 @@ impl Module { let mut shuffled_indices: Vec<_> = (0..validators.len()) .enumerate() - .map(|(i, _)| i as ValidatorIndex) + .map(|(i, _)| ValidatorIndex(i as _)) .collect(); shuffled_indices.shuffle(&mut rng); From 49b176469646e683f04a76f6b625fadab3877c92 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 19 Feb 2021 09:21:29 +0100 Subject: [PATCH 27/60] More fixes. adder-collator is running! --- node/core/av-store/src/lib.rs | 8 ++++---- .../src/requester/fetch_task.rs | 2 +- node/network/availability-recovery/src/lib.rs | 6 +++--- node/network/protocol/src/request_response/v1.rs | 2 +- primitives/src/v0.rs | 10 +++------- 5 files changed, 12 insertions(+), 16 deletions(-) diff --git a/node/core/av-store/src/lib.rs b/node/core/av-store/src/lib.rs index 66846f7ecc88..2b29f9426689 100644 --- a/node/core/av-store/src/lib.rs +++ b/node/core/av-store/src/lib.rs @@ -1034,10 +1034,10 @@ fn store_chunk( None => return Ok(false), // we weren't informed of this candidate by import events. }; - match meta.chunks_stored.get(chunk.index as usize).map(|b| *b) { + match meta.chunks_stored.get(chunk.index.0 as usize).map(|b| *b) { Some(true) => return Ok(true), // already stored. Some(false) => { - meta.chunks_stored.set(chunk.index as usize, true); + meta.chunks_stored.set(chunk.index.0 as usize, true); write_chunk(&mut tx, &candidate_hash, chunk.index, &chunk); write_meta(&mut tx, &candidate_hash, &meta); @@ -1090,7 +1090,7 @@ fn store_available_data( .map(|(index, (chunk, proof))| ErasureChunk { chunk: chunk.clone(), proof, - index: index as u32, + index: ValidatorIndex(index as u32), }); for chunk in erasure_chunks { @@ -1135,7 +1135,7 @@ fn prune_all(db: &Arc, clock: &dyn Clock) -> Result<(), Error> { // delete chunks. for (i, b) in meta.chunks_stored.iter().enumerate() { if *b { - delete_chunk(&mut tx, &candidate_hash, i as _); + delete_chunk(&mut tx, &candidate_hash, ValidatorIndex(i as _)); } } diff --git a/node/network/availability-distribution/src/requester/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task.rs index 501a30abce70..aee546da9b9b 100644 --- a/node/network/availability-distribution/src/requester/fetch_task.rs +++ b/node/network/availability-distribution/src/requester/fetch_task.rs @@ -312,7 +312,7 @@ impl RunningTask { fn validate_chunk(&self, validator: &AuthorityDiscoveryId, chunk: &ErasureChunk) -> bool { let anticipated_hash = - match branch_hash(&self.erasure_root, &chunk.proof, chunk.index as usize) { + match branch_hash(&self.erasure_root, &chunk.proof, chunk.index.0 as usize) { Ok(hash) => hash, Err(e) => { tracing::trace!( diff --git a/node/network/availability-recovery/src/lib.rs b/node/network/availability-recovery/src/lib.rs index e000b03c620c..5790f00b565c 100644 --- a/node/network/availability-recovery/src/lib.rs +++ b/node/network/availability-recovery/src/lib.rs @@ -195,7 +195,7 @@ impl Interaction { if let Ok(anticipated_hash) = branch_hash( &self.erasure_root, &chunk.proof, - chunk.index as usize, + chunk.index.0 as usize, ) { let erasure_chunk_hash = BlakeTwo256::hash(&chunk.chunk); @@ -269,7 +269,7 @@ impl Interaction { if self.received_chunks.len() >= self.threshold { let concluded = match polkadot_erasure_coding::reconstruct_v1( self.validators.len(), - self.received_chunks.values().map(|c| (&c.chunk[..], c.index as usize)), + self.received_chunks.values().map(|c| (&c.chunk[..], c.index.0 as usize)), ) { Ok(data) => { if reconstructed_data_matches_root(self.validators.len(), &self.erasure_root, &data) { @@ -423,7 +423,7 @@ async fn launch_interaction( let erasure_root = receipt.descriptor.erasure_root; let validators = session_info.validators.clone(); let validator_authority_keys = session_info.discovery_keys.clone(); - let mut shuffling: Vec<_> = (0..validators.len() as ValidatorIndex).collect(); + let mut shuffling: Vec<_> = (0..validators.len() as u32).map(ValidatorIndex).collect(); state.interactions.insert( candidate_hash.clone(), diff --git a/node/network/protocol/src/request_response/v1.rs b/node/network/protocol/src/request_response/v1.rs index 06e4ea522086..22724c1f44b7 100644 --- a/node/network/protocol/src/request_response/v1.rs +++ b/node/network/protocol/src/request_response/v1.rs @@ -69,7 +69,7 @@ impl ChunkResponse { ErasureChunk { chunk: self.chunk, proof: self.proof, - index: req.index.0, + index: req.index, } } } diff --git a/primitives/src/v0.rs b/primitives/src/v0.rs index 16e7bf88578d..4259b1269c15 100644 --- a/primitives/src/v0.rs +++ b/primitives/src/v0.rs @@ -114,12 +114,8 @@ impl MallocSizeOf for ValidatorId { } /// Index of the validator is used as a lightweight replacement of the `ValidatorId` when appropriate. -#[cfg(not(feature = "std"))] -#[derive(Eq, Ord, PartialEq, PartialOrd, Hash, Copy, Clone, Encode, Decode)] -pub struct ValidatorIndex(pub u32); - -#[cfg(feature = "std")] -#[derive(Eq, Ord, PartialEq, PartialOrd, Hash, Copy, Clone, Encode, Decode, Debug, MallocSizeOf)] +#[derive(Eq, Ord, PartialEq, PartialOrd, Copy, Clone, Encode, Decode)] +#[cfg_attr(feature = "std", derive(Serialize, Deserialize, Debug, Hash, MallocSizeOf))] pub struct ValidatorIndex(pub u32); // We should really get https://github.com/paritytech/polkadot/issues/2403 going .. @@ -676,7 +672,7 @@ pub struct ErasureChunk { /// The erasure-encoded chunk of data belonging to the candidate block. pub chunk: Vec, /// The index of this erasure-encoded chunk of data. - pub index: u32, + pub index: ValidatorIndex, /// Proof for this chunk's branch in the Merkle tree. pub proof: Vec>, } From a1413301d5c2113f2f82697cd3aaedafdfe4ba9e Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 19 Feb 2021 09:42:18 +0100 Subject: [PATCH 28/60] Some docs. --- node/network/availability-distribution/src/error.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs index 658bad97f3ce..f66d418d8e35 100644 --- a/node/network/availability-distribution/src/error.rs +++ b/node/network/availability-distribution/src/error.rs @@ -15,6 +15,8 @@ // along with Polkadot. If not, see . // +//! Error handling related code and Error/Result definitions. + use thiserror::Error; use futures::channel::oneshot; @@ -23,6 +25,7 @@ use polkadot_node_subsystem_util::Error as UtilError; use polkadot_primitives::v1::SessionIndex; use polkadot_subsystem::{errors::RuntimeApiError, SubsystemError}; +/// Errors of this subsystem. #[derive(Debug, Error)] pub enum Error { #[error("Response channel to obtain StoreChunk failed")] From fad4586deaa827990abc077d46fd0333481c4bda Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 19 Feb 2021 10:40:06 +0100 Subject: [PATCH 29/60] Docs. --- .../network/availability-distribution/src/lib.rs | 4 +++- .../src/requester/fetch_task.rs | 16 +++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs index 9ee9661affe0..ab68a899582b 100644 --- a/node/network/availability-distribution/src/lib.rs +++ b/node/network/availability-distribution/src/lib.rs @@ -45,7 +45,7 @@ const LOG_TARGET: &'static str = "availability_distribution"; /// TODO: Dummy for now. type Metrics = (); -/// The bitfield distribution subsystem. +/// The availability distribution subsystem. pub struct AvailabilityDistributionSubsystem { /// Pointer to a keystore, which is required for determining this nodes validator index. keystore: SyncCryptoStorePtr, @@ -90,6 +90,8 @@ impl AvailabilityDistributionSubsystem { from_task = state.next() => Either::Right(from_task), } }; + + // Handle task messages sending: let message = match action { Either::Left(subsystem_msg) => { subsystem_msg.map_err(|e| Error::IncomingMessageChannel(e))? diff --git a/node/network/availability-distribution/src/requester/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task.rs index aee546da9b9b..7c21bfaf3a34 100644 --- a/node/network/availability-distribution/src/requester/fetch_task.rs +++ b/node/network/availability-distribution/src/requester/fetch_task.rs @@ -152,6 +152,8 @@ impl FetchTaskConfig { impl FetchTask { /// Start fetching a chunk. + /// + /// A task handling the fetching of the configured chunk will be spawned. pub async fn start(config: FetchTaskConfig, ctx: &mut Context) -> Result where Context: SubsystemContext, @@ -181,6 +183,8 @@ impl FetchTask { } /// Add the given leaf to the relay parents which are making this task relevant. + /// + /// This is for book keeping, so we know we are already fetching a chunk. pub fn add_leaf(&mut self, leaf: Hash) { self.live_in.insert(leaf); } @@ -189,7 +193,7 @@ impl FetchTask { /// fetching. pub fn remove_leaves(&mut self, leaves: &HashSet) { self.live_in.difference(leaves); - if self.live_in.is_empty() { + if self.live_in.is_empty() && !self.is_finished() { self.state = FetchedState::Canceled } } @@ -199,6 +203,16 @@ impl FetchTask { pub fn is_live(&self) -> bool { !self.live_in.is_empty() } + + /// Whether or not this task can be considered finished. + /// + /// That is, it is either canceled, succeeded or failed. + pub fn is_finished(&self) -> bool { + match &self.state { + FetchedState::Canceled => true, + FetchedState::Started(sender) => sender.is_canceled(), + } + } } /// Things that can go wrong in task execution. From e617e91bd6d9660c13342b3316e1539ce86912df Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 19 Feb 2021 13:00:06 +0100 Subject: [PATCH 30/60] Fix reporting of bad guys. --- .../src/requester/fetch_task.rs | 2 +- .../availability-distribution/src/session_cache.rs | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/node/network/availability-distribution/src/requester/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task.rs index 7c21bfaf3a34..f19dc98ba743 100644 --- a/node/network/availability-distribution/src/requester/fetch_task.rs +++ b/node/network/availability-distribution/src/requester/fetch_task.rs @@ -238,7 +238,7 @@ impl RunningTask { /// Try validators in backing group in order. async fn run_inner(mut self) { let mut bad_validators = Vec::new(); - // Try validators in order: + // Try validators in reverse order: while let Some(validator) = self.group.pop() { // Send request: let resp = match self.do_request(&validator).await { diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs index 2b40c3db7b2d..1a5c2e2eee02 100644 --- a/node/network/availability-distribution/src/session_cache.rs +++ b/node/network/availability-distribution/src/session_cache.rs @@ -143,6 +143,9 @@ impl SessionCache { } /// Make sure we try unresponsive or misbehaving validators last. + /// + /// We assume validators in a group are tried in reverse order, so the reported bad validators + /// will be put at the beginning of the group. pub fn report_bad(&mut self, mut report: BadValidators) -> Result<()> { let session = self .session_info_cache @@ -153,9 +156,14 @@ impl SessionCache { .get_mut(report.group_index.0 as usize) .ok_or(Error::ReportBadValidators("Validator group not found"))?; let bad_set = report.bad_validators.iter().collect::>(); - // Put the bad boys last: + + // Get rid of bad boys: group.retain(|v| !bad_set.contains(v)); - group.append(&mut report.bad_validators); + + // We are trying validators in reverse order, so bad ones should be first: + let mut new_group = report.bad_validators; + new_group.append(group); + *group = new_group; Ok(()) } From a4eef9b249d6a25dca52d41232ba5a4936038a5f Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 19 Feb 2021 19:20:07 +0100 Subject: [PATCH 31/60] Fix tests --- node/core/bitfield-signing/src/lib.rs | 2 +- node/core/provisioner/src/tests.rs | 26 ++++---- .../src/session_cache.rs | 2 +- node/network/pov-distribution/src/tests.rs | 8 +-- .../network/statement-distribution/src/lib.rs | 60 +++++++++---------- runtime/parachains/src/inclusion.rs | 36 +++++------ 6 files changed, 67 insertions(+), 67 deletions(-) diff --git a/node/core/bitfield-signing/src/lib.rs b/node/core/bitfield-signing/src/lib.rs index f337db40914f..bd194742acbc 100644 --- a/node/core/bitfield-signing/src/lib.rs +++ b/node/core/bitfield-signing/src/lib.rs @@ -317,7 +317,7 @@ mod tests { block_on(async move { let (mut sender, mut receiver) = mpsc::channel(10); let relay_parent = Hash::default(); - let validator_index = 1u32; + let validator_index = ValidatorIndex(1u32); let future = construct_availability_bitfield( relay_parent, diff --git a/node/core/provisioner/src/tests.rs b/node/core/provisioner/src/tests.rs index 40a1c51e1ad0..8f26e6ee2f09 100644 --- a/node/core/provisioner/src/tests.rs +++ b/node/core/provisioner/src/tests.rs @@ -78,9 +78,9 @@ mod select_availability_bitfields { // we pass in three bitfields with two validators // this helps us check the postcondition that we get two bitfields back, for which the validators differ let bitfields = vec![ - block_on(signed_bitfield(&keystore, bitvec.clone(), 0)), - block_on(signed_bitfield(&keystore, bitvec.clone(), 1)), - block_on(signed_bitfield(&keystore, bitvec, 1)), + block_on(signed_bitfield(&keystore, bitvec.clone(), ValidatorIndex(0))), + block_on(signed_bitfield(&keystore, bitvec.clone(), ValidatorIndex(1))), + block_on(signed_bitfield(&keystore, bitvec, ValidatorIndex(1))), ]; let mut selected_bitfields = select_availability_bitfields(&cores, &bitfields); @@ -116,9 +116,9 @@ mod select_availability_bitfields { ]; let bitfields = vec![ - block_on(signed_bitfield(&keystore, bitvec0, 0)), - block_on(signed_bitfield(&keystore, bitvec1, 1)), - block_on(signed_bitfield(&keystore, bitvec2.clone(), 2)), + block_on(signed_bitfield(&keystore, bitvec0, ValidatorIndex(0))), + block_on(signed_bitfield(&keystore, bitvec1, ValidatorIndex(1))), + block_on(signed_bitfield(&keystore, bitvec2.clone(), ValidatorIndex(2))), ]; let selected_bitfields = select_availability_bitfields(&cores, &bitfields); @@ -140,8 +140,8 @@ mod select_availability_bitfields { let cores = vec![occupied_core(0), occupied_core(1)]; let bitfields = vec![ - block_on(signed_bitfield(&keystore, bitvec, 1)), - block_on(signed_bitfield(&keystore, bitvec1.clone(), 1)), + block_on(signed_bitfield(&keystore, bitvec, ValidatorIndex(1))), + block_on(signed_bitfield(&keystore, bitvec1.clone(), ValidatorIndex(1))), ]; let selected_bitfields = select_availability_bitfields(&cores, &bitfields); @@ -174,11 +174,11 @@ mod select_availability_bitfields { // these are out of order but will be selected in order. The better // bitfield for 3 will be selected. let bitfields = vec![ - block_on(signed_bitfield(&keystore, bitvec2.clone(), 3)), - block_on(signed_bitfield(&keystore, bitvec3.clone(), 3)), - block_on(signed_bitfield(&keystore, bitvec0.clone(), 0)), - block_on(signed_bitfield(&keystore, bitvec2.clone(), 2)), - block_on(signed_bitfield(&keystore, bitvec1.clone(), 1)), + block_on(signed_bitfield(&keystore, bitvec2.clone(), ValidatorIndex(3))), + block_on(signed_bitfield(&keystore, bitvec3.clone(), ValidatorIndex(3))), + block_on(signed_bitfield(&keystore, bitvec0.clone(), ValidatorIndex(0))), + block_on(signed_bitfield(&keystore, bitvec2.clone(), ValidatorIndex(2))), + block_on(signed_bitfield(&keystore, bitvec1.clone(), ValidatorIndex(1))), ]; let selected_bitfields = select_availability_bitfields(&cores, &bitfields); diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs index 1a5c2e2eee02..874671fbefcb 100644 --- a/node/network/availability-distribution/src/session_cache.rs +++ b/node/network/availability-distribution/src/session_cache.rs @@ -146,7 +146,7 @@ impl SessionCache { /// /// We assume validators in a group are tried in reverse order, so the reported bad validators /// will be put at the beginning of the group. - pub fn report_bad(&mut self, mut report: BadValidators) -> Result<()> { + pub fn report_bad(&mut self, report: BadValidators) -> Result<()> { let session = self .session_info_cache .get_mut(&report.session_index) diff --git a/node/network/pov-distribution/src/tests.rs b/node/network/pov-distribution/src/tests.rs index 8cf37dfad878..2dfc0ce11f96 100644 --- a/node/network/pov-distribution/src/tests.rs +++ b/node/network/pov-distribution/src/tests.rs @@ -174,7 +174,7 @@ impl Default for TestState { .take(validator_public.len()) .collect(); - let validator_groups = vec![vec![2, 0, 4], vec![1], vec![3]]; + let validator_groups = vec![vec![2, 0, 4], vec![1], vec![3]].into_iter().map(|g| g.map(ValidatorIndex)).collect(); let group_rotation_info = GroupRotationInfo { session_start_block: 0, group_rotation_frequency: 100, @@ -238,11 +238,11 @@ async fn test_validator_discovery( assert_eq!(index, session_index); let validators = validator_group.iter() - .map(|idx| validator_ids[*idx as usize].clone()) + .map(|idx| validator_ids[idx.0 as usize].clone()) .collect(); let discovery_keys = validator_group.iter() - .map(|idx| discovery_ids[*idx as usize].clone()) + .map(|idx| discovery_ids[idx.0 as usize].clone()) .collect(); tx.send(Ok(Some(SessionInfo { @@ -737,7 +737,7 @@ fn we_inform_peers_with_same_view_we_are_awaiting() { .take(validators.len()) .collect(); - let validator_groups = vec![vec![2, 0, 4], vec![1], vec![3]]; + let validator_groups = vec![vec![2, 0, 4], vec![1], vec![3]].map(|g| g.map(ValidatorIndex)).collect(); let group_rotation_info = GroupRotationInfo { session_start_block: 0, group_rotation_frequency: 100, diff --git a/node/network/statement-distribution/src/lib.rs b/node/network/statement-distribution/src/lib.rs index 7d79f54b5b89..fdeb7e0c00b2 100644 --- a/node/network/statement-distribution/src/lib.rs +++ b/node/network/statement-distribution/src/lib.rs @@ -1133,7 +1133,7 @@ mod tests { &keystore, Statement::Seconded(candidate_a.clone()), &signing_context, - 0, + ValidatorIndex(0), &alice_public.into(), )).expect("should be signed"); let noted = head_data.note_statement(a_seconded_val_0.clone()); @@ -1150,7 +1150,7 @@ mod tests { &keystore, Statement::Seconded(candidate_b.clone()), &signing_context, - 0, + ValidatorIndex(0), &alice_public.into(), )).expect("should be signed")); @@ -1161,7 +1161,7 @@ mod tests { &keystore, Statement::Seconded(candidate_c.clone()), &signing_context, - 0, + ValidatorIndex(0), &alice_public.into(), )).expect("should be signed")); @@ -1172,7 +1172,7 @@ mod tests { &keystore, Statement::Seconded(candidate_b.clone()), &signing_context, - 1, + ValidatorIndex(1), &bob_public.into(), )).expect("should be signed")); @@ -1183,7 +1183,7 @@ mod tests { &keystore, Statement::Seconded(candidate_c.clone()), &signing_context, - 1, + ValidatorIndex(1), &bob_public.into(), )).expect("should be signed")); @@ -1233,7 +1233,7 @@ mod tests { let hash_a = CandidateHash([1; 32].into()); // Sending an un-pinned statement should not work and should have no effect. - assert!(knowledge.send(&(CompactStatement::Valid(hash_a), 0)).is_none()); + assert!(knowledge.send(&(CompactStatement::Valid(hash_a), ValidatorIndex(0))).is_none()); assert!(!knowledge.known_candidates.contains(&hash_a)); assert!(knowledge.sent_statements.is_empty()); assert!(knowledge.received_statements.is_empty()); @@ -1241,8 +1241,8 @@ mod tests { assert!(knowledge.received_message_count.is_empty()); // Make the peer aware of the candidate. - assert_eq!(knowledge.send(&(CompactStatement::Candidate(hash_a), 0)), Some(true)); - assert_eq!(knowledge.send(&(CompactStatement::Candidate(hash_a), 1)), Some(false)); + assert_eq!(knowledge.send(&(CompactStatement::Candidate(hash_a), ValidatorIndex(0))), Some(true)); + assert_eq!(knowledge.send(&(CompactStatement::Candidate(hash_a), ValidatorIndex(1))), Some(false)); assert!(knowledge.known_candidates.contains(&hash_a)); assert_eq!(knowledge.sent_statements.len(), 2); assert!(knowledge.received_statements.is_empty()); @@ -1250,7 +1250,7 @@ mod tests { assert!(knowledge.received_message_count.get(&hash_a).is_none()); // And now it should accept the dependent message. - assert_eq!(knowledge.send(&(CompactStatement::Valid(hash_a), 0)), Some(false)); + assert_eq!(knowledge.send(&(CompactStatement::Valid(hash_a), ValidatorIndex(0))), Some(false)); assert!(knowledge.known_candidates.contains(&hash_a)); assert_eq!(knowledge.sent_statements.len(), 3); assert!(knowledge.received_statements.is_empty()); @@ -1263,8 +1263,8 @@ mod tests { let mut knowledge = PeerRelayParentKnowledge::default(); let hash_a = CandidateHash([1; 32].into()); - assert!(knowledge.receive(&(CompactStatement::Candidate(hash_a), 0), 3).unwrap()); - assert!(knowledge.send(&(CompactStatement::Candidate(hash_a), 0)).is_none()); + assert!(knowledge.receive(&(CompactStatement::Candidate(hash_a), ValidatorIndex(0)), 3).unwrap()); + assert!(knowledge.send(&(CompactStatement::Candidate(hash_a), ValidatorIndex(0))).is_none()); } #[test] @@ -1274,18 +1274,18 @@ mod tests { let hash_a = CandidateHash([1; 32].into()); assert_eq!( - knowledge.receive(&(CompactStatement::Valid(hash_a), 0), 3), + knowledge.receive(&(CompactStatement::Valid(hash_a), ValidatorIndex(0)), 3), Err(COST_UNEXPECTED_STATEMENT), ); assert_eq!( - knowledge.receive(&(CompactStatement::Candidate(hash_a), 0), 3), + knowledge.receive(&(CompactStatement::Candidate(hash_a), ValidatorIndex(0)), 3), Ok(true), ); // Push statements up to the flood limit. assert_eq!( - knowledge.receive(&(CompactStatement::Valid(hash_a), 1), 3), + knowledge.receive(&(CompactStatement::Valid(hash_a), ValidatorIndex(1)), 3), Ok(false), ); @@ -1293,14 +1293,14 @@ mod tests { assert_eq!(*knowledge.received_message_count.get(&hash_a).unwrap(), 2); assert_eq!( - knowledge.receive(&(CompactStatement::Valid(hash_a), 2), 3), + knowledge.receive(&(CompactStatement::Valid(hash_a), ValidatorIndex(2)), 3), Ok(false), ); assert_eq!(*knowledge.received_message_count.get(&hash_a).unwrap(), 3); assert_eq!( - knowledge.receive(&(CompactStatement::Valid(hash_a), 7), 3), + knowledge.receive(&(CompactStatement::Valid(hash_a), ValidatorIndex(7)), 3), Err(COST_APPARENT_FLOOD), ); @@ -1312,23 +1312,23 @@ mod tests { let hash_c = CandidateHash([3; 32].into()); assert_eq!( - knowledge.receive(&(CompactStatement::Candidate(hash_b), 0), 3), + knowledge.receive(&(CompactStatement::Candidate(hash_b), ValidatorIndex(0)), 3), Ok(true), ); assert_eq!( - knowledge.receive(&(CompactStatement::Candidate(hash_c), 0), 3), + knowledge.receive(&(CompactStatement::Candidate(hash_c), ValidatorIndex(0)), 3), Err(COST_UNEXPECTED_STATEMENT), ); // Last, make sure that already-known statements are disregarded. assert_eq!( - knowledge.receive(&(CompactStatement::Valid(hash_a), 2), 3), + knowledge.receive(&(CompactStatement::Valid(hash_a), ValidatorIndex(2)), 3), Err(COST_DUPLICATE_STATEMENT), ); assert_eq!( - knowledge.receive(&(CompactStatement::Candidate(hash_b), 0), 3), + knowledge.receive(&(CompactStatement::Candidate(hash_b), ValidatorIndex(0)), 3), Err(COST_DUPLICATE_STATEMENT), ); } @@ -1386,7 +1386,7 @@ mod tests { &keystore, Statement::Seconded(candidate.clone()), &signing_context, - 0, + ValidatorIndex(0), &alice_public.into(), )).expect("should be signed")); @@ -1396,7 +1396,7 @@ mod tests { &keystore, Statement::Valid(candidate_hash), &signing_context, - 1, + ValidatorIndex(1), &bob_public.into(), )).expect("should be signed")); @@ -1406,7 +1406,7 @@ mod tests { &keystore, Statement::Valid(candidate_hash), &signing_context, - 2, + ValidatorIndex(2), &charlie_public.into(), )).expect("should be signed")); @@ -1451,13 +1451,13 @@ mod tests { assert!(c_knowledge.known_candidates.contains(&candidate_hash)); assert!(c_knowledge.sent_statements.contains( - &(CompactStatement::Candidate(candidate_hash), 0) + &(CompactStatement::Candidate(candidate_hash), ValidatorIndex(0)) )); assert!(c_knowledge.sent_statements.contains( - &(CompactStatement::Valid(candidate_hash), 1) + &(CompactStatement::Valid(candidate_hash), ValidatorIndex(1)) )); assert!(c_knowledge.sent_statements.contains( - &(CompactStatement::Valid(candidate_hash), 2) + &(CompactStatement::Valid(candidate_hash), ValidatorIndex(2)) )); // now see if we got the 3 messages from the active head data. @@ -1538,14 +1538,14 @@ mod tests { &keystore, Statement::Seconded(candidate), &signing_context, - 0, + ValidatorIndex(0), &alice_public.into(), ).await.expect("should be signed"); StoredStatement { comparator: StoredStatementComparator { compact: statement.payload().to_compact(), - validator_index: 0, + validator_index: ValidatorIndex(0), signature: statement.signature().clone() }, statement, @@ -1565,7 +1565,7 @@ mod tests { assert!(needs_dependents.contains(&peer_c)); } - let fingerprint = (statement.compact().clone(), 0); + let fingerprint = (statement.compact().clone(), ValidatorIndex(0)); assert!( peer_data.get(&peer_b).unwrap() @@ -1706,7 +1706,7 @@ mod tests { &keystore, Statement::Seconded(candidate), &signing_context, - 0, + ValidatorIndex(0), &alice_public.into(), ).await.expect("should be signed") }; diff --git a/runtime/parachains/src/inclusion.rs b/runtime/parachains/src/inclusion.rs index 7deaf32e73a7..1ed91b386817 100644 --- a/runtime/parachains/src/inclusion.rs +++ b/runtime/parachains/src/inclusion.rs @@ -991,7 +991,7 @@ mod tests { let candidate_hash = candidate.hash(); for (idx_in_group, val_idx) in group.iter().enumerate().take(signing) { - let key: Sr25519Keyring = validators[*val_idx as usize]; + let key: Sr25519Keyring = validators[val_idx.0 as usize]; *validator_indices.get_mut(idx_in_group).unwrap() = true; let signature = SignedStatement::sign( @@ -1020,7 +1020,7 @@ mod tests { &backed, signing_context, group.len(), - |i| Some(validators[group[i] as usize].public().into()), + |i| Some(validators[group[i].0 as usize].public().into()), ).ok().unwrap_or(0) * 2 > group.len(); if should_pass { @@ -1238,7 +1238,7 @@ mod tests { let signed = block_on(sign_bitfield( &keystore, &validators[0], - 0, + ValidatorIndex(0), bare_bitfield, &signing_context, )); @@ -1255,7 +1255,7 @@ mod tests { let signed = block_on(sign_bitfield( &keystore, &validators[0], - 0, + ValidatorIndex(0), bare_bitfield, &signing_context, )); @@ -1272,7 +1272,7 @@ mod tests { let signed_0 = block_on(sign_bitfield( &keystore, &validators[0], - 0, + ValidatorIndex(0), bare_bitfield.clone(), &signing_context, )); @@ -1280,7 +1280,7 @@ mod tests { let signed_1 = block_on(sign_bitfield( &keystore, &validators[1], - 1, + ValidatorIndex(1), bare_bitfield, &signing_context, )); @@ -1298,7 +1298,7 @@ mod tests { let signed = block_on(sign_bitfield( &keystore, &validators[0], - 0, + ValidatorIndex(0), bare_bitfield, &signing_context, )); @@ -1315,7 +1315,7 @@ mod tests { let signed = block_on(sign_bitfield( &keystore, &validators[0], - 0, + ValidatorIndex(0), bare_bitfield, &signing_context, )); @@ -1349,7 +1349,7 @@ mod tests { let signed = block_on(sign_bitfield( &keystore, &validators[0], - 0, + ValidatorIndex(0), bare_bitfield, &signing_context, )); @@ -1385,7 +1385,7 @@ mod tests { let signed = block_on(sign_bitfield( &keystore, &validators[0], - 0, + ValidatorIndex(0), bare_bitfield, &signing_context, )); @@ -1509,7 +1509,7 @@ mod tests { Some(block_on(sign_bitfield( &keystore, key, - i as ValidatorIndex, + ValidatorIndex(i as _), to_sign, &signing_context, ))) @@ -1547,18 +1547,18 @@ mod tests { let rewards = crate::mock::availability_rewards(); assert_eq!(rewards.len(), 4); - assert_eq!(rewards.get(&0).unwrap(), &1); - assert_eq!(rewards.get(&1).unwrap(), &1); - assert_eq!(rewards.get(&2).unwrap(), &1); - assert_eq!(rewards.get(&3).unwrap(), &1); + assert_eq!(rewards.get(&ValidatorIndex(0)).unwrap(), &1); + assert_eq!(rewards.get(&ValidatorIndex(1)).unwrap(), &1); + assert_eq!(rewards.get(&ValidatorIndex(2)).unwrap(), &1); + assert_eq!(rewards.get(&ValidatorIndex(3)).unwrap(), &1); } { let rewards = crate::mock::backing_rewards(); assert_eq!(rewards.len(), 2); - assert_eq!(rewards.get(&3).unwrap(), &1); - assert_eq!(rewards.get(&4).unwrap(), &1); + assert_eq!(rewards.get(&ValidatorIndex(3)).unwrap(), &1); + assert_eq!(rewards.get(&ValidatorIndex(4)).unwrap(), &1); } }); } @@ -1602,7 +1602,7 @@ mod tests { group_index if group_index == GroupIndex::from(1) => Some(vec![2, 3]), group_index if group_index == GroupIndex::from(2) => Some(vec![4]), _ => panic!("Group index out of bounds for 2 parachains and 1 parathread core"), - }; + }.map(|m| m.map(ValidatorIndex)); let thread_collator: CollatorId = Sr25519Keyring::Two.public().into(); From ea5f6a4dec66735f4b7f2bc1fd35beb36e22c79d Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 19 Feb 2021 21:43:29 +0100 Subject: [PATCH 32/60] Make all tests compile. --- .../approval-voting/src/approval_checking.rs | 68 ++++++------ node/core/approval-voting/src/criteria.rs | 15 ++- node/core/approval-voting/src/import.rs | 3 +- node/core/approval-voting/src/tests.rs | 104 +++++++++--------- node/core/av-store/src/tests.rs | 26 ++--- node/core/backing/src/lib.rs | 41 +++---- .../approval-distribution/src/tests.rs | 16 +-- .../availability-recovery/src/tests.rs | 12 +- node/network/bitfield-distribution/src/lib.rs | 14 +-- .../collator-protocol/src/collator_side.rs | 11 +- node/network/pov-distribution/src/tests.rs | 6 +- runtime/parachains/src/inclusion.rs | 24 ++-- 12 files changed, 172 insertions(+), 168 deletions(-) diff --git a/node/core/approval-voting/src/approval_checking.rs b/node/core/approval-voting/src/approval_checking.rs index 90dbd6728cec..ec623a25bf99 100644 --- a/node/core/approval-voting/src/approval_checking.rs +++ b/node/core/approval-voting/src/approval_checking.rs @@ -348,7 +348,7 @@ pub fn tranches_to_approve( mod tests { use super::*; - use polkadot_primitives::v1::GroupIndex; + use polkadot_primitives::v1::{GroupIndex, ValidatorIndex}; use bitvec::bitvec; use bitvec::order::Lsb0 as BitOrderLsb0; @@ -393,7 +393,7 @@ mod tests { }.into(); for i in 0..6 { - candidate.mark_approval(i); + candidate.mark_approval(ValidatorIndex(i)); } let approval_entry = approval_db::v1::ApprovalEntry { @@ -406,7 +406,7 @@ mod tests { assert!(!check_approval(&candidate, &approval_entry, RequiredTranches::All)); - candidate.mark_approval(6); + candidate.mark_approval(ValidatorIndex(6)); assert!(check_approval(&candidate, &approval_entry, RequiredTranches::All)); } @@ -420,22 +420,22 @@ mod tests { }.into(); for i in 0..6 { - candidate.mark_approval(i); + candidate.mark_approval(ValidatorIndex(i)); } let approval_entry = approval_db::v1::ApprovalEntry { tranches: vec![ approval_db::v1::TrancheEntry { tranche: 0, - assignments: (0..4).map(|i| (i, 0.into())).collect(), + assignments: (0..4).map(|i| (ValidatorIndex(i), 0.into())).collect(), }, approval_db::v1::TrancheEntry { tranche: 1, - assignments: (4..6).map(|i| (i, 1.into())).collect(), + assignments: (4..6).map(|i| (ValidatorIndex(i), 1.into())).collect(), }, approval_db::v1::TrancheEntry { tranche: 2, - assignments: (6..10).map(|i| (i, 0.into())).collect(), + assignments: (6..10).map(|i| (ValidatorIndex(i), 0.into())).collect(), }, ], assignments: bitvec![BitOrderLsb0, u8; 1; 10], @@ -487,13 +487,13 @@ mod tests { approved: false, }.into(); - approval_entry.import_assignment(0, 0, block_tick); - approval_entry.import_assignment(0, 1, block_tick); + approval_entry.import_assignment(0,ValidatorIndex(0), block_tick); + approval_entry.import_assignment(0,ValidatorIndex(1), block_tick); - approval_entry.import_assignment(1, 2, block_tick + 1); - approval_entry.import_assignment(1, 3, block_tick + 1); + approval_entry.import_assignment(1,ValidatorIndex(2), block_tick + 1); + approval_entry.import_assignment(1,ValidatorIndex(3), block_tick + 1); - approval_entry.import_assignment(2, 4, block_tick + 2); + approval_entry.import_assignment(2,ValidatorIndex(4), block_tick + 2); let approvals = bitvec![BitOrderLsb0, u8; 1; 5]; @@ -524,8 +524,8 @@ mod tests { approved: false, }.into(); - approval_entry.import_assignment(0, 0, block_tick); - approval_entry.import_assignment(1, 2, block_tick); + approval_entry.import_assignment(0, ValidatorIndex(0), block_tick); + approval_entry.import_assignment(1, ValidatorIndex(2), block_tick); let approvals = bitvec![BitOrderLsb0, u8; 0; 10]; @@ -562,10 +562,10 @@ mod tests { approved: false, }.into(); - approval_entry.import_assignment(0, 0, block_tick); - approval_entry.import_assignment(0, 1, block_tick); + approval_entry.import_assignment(0, ValidatorIndex(0), block_tick); + approval_entry.import_assignment(0, ValidatorIndex(1), block_tick); - approval_entry.import_assignment(1, 2, block_tick); + approval_entry.import_assignment(1, ValidatorIndex(2), block_tick); let mut approvals = bitvec![BitOrderLsb0, u8; 0; 10]; approvals.set(0, true); @@ -605,11 +605,11 @@ mod tests { approved: false, }.into(); - approval_entry.import_assignment(0, 0, block_tick); - approval_entry.import_assignment(0, 1, block_tick); + approval_entry.import_assignment(0, ValidatorIndex(0), block_tick); + approval_entry.import_assignment(0, ValidatorIndex(1), block_tick); - approval_entry.import_assignment(1, 2, block_tick); - approval_entry.import_assignment(1, 3, block_tick); + approval_entry.import_assignment(1, ValidatorIndex(2), block_tick); + approval_entry.import_assignment(1, ValidatorIndex(3), block_tick); let mut approvals = bitvec![BitOrderLsb0, u8; 0; n_validators]; approvals.set(0, true); @@ -670,14 +670,14 @@ mod tests { approved: false, }.into(); - approval_entry.import_assignment(0, 0, block_tick); - approval_entry.import_assignment(0, 1, block_tick); + approval_entry.import_assignment(0, ValidatorIndex(0), block_tick); + approval_entry.import_assignment(0, ValidatorIndex(1), block_tick); - approval_entry.import_assignment(1, 2, block_tick + 1); - approval_entry.import_assignment(1, 3, block_tick + 1); + approval_entry.import_assignment(1, ValidatorIndex(2), block_tick + 1); + approval_entry.import_assignment(1, ValidatorIndex(3), block_tick + 1); - approval_entry.import_assignment(2, 4, block_tick + no_show_duration + 2); - approval_entry.import_assignment(2, 5, block_tick + no_show_duration + 2); + approval_entry.import_assignment(2, ValidatorIndex(4), block_tick + no_show_duration + 2); + approval_entry.import_assignment(2, ValidatorIndex(5), block_tick + no_show_duration + 2); let mut approvals = bitvec![BitOrderLsb0, u8; 0; n_validators]; approvals.set(0, true); @@ -757,14 +757,14 @@ mod tests { approved: false, }.into(); - approval_entry.import_assignment(0, 0, block_tick); - approval_entry.import_assignment(0, 1, block_tick); + approval_entry.import_assignment(0, ValidatorIndex(0), block_tick); + approval_entry.import_assignment(0, ValidatorIndex(1), block_tick); - approval_entry.import_assignment(1, 2, block_tick + 1); - approval_entry.import_assignment(1, 3, block_tick + 1); + approval_entry.import_assignment(1, ValidatorIndex(2), block_tick + 1); + approval_entry.import_assignment(1, ValidatorIndex(3), block_tick + 1); - approval_entry.import_assignment(2, 4, block_tick + no_show_duration + 2); - approval_entry.import_assignment(2, 5, block_tick + no_show_duration + 2); + approval_entry.import_assignment(2, ValidatorIndex(4), block_tick + no_show_duration + 2); + approval_entry.import_assignment(2, ValidatorIndex(5), block_tick + no_show_duration + 2); let mut approvals = bitvec![BitOrderLsb0, u8; 0; n_validators]; approvals.set(0, true); @@ -813,7 +813,7 @@ mod tests { }, ); - approval_entry.import_assignment(3, 6, block_tick); + approval_entry.import_assignment(3, ValidatorIndex(6), block_tick); approvals.set(6, true); let tranche_now = no_show_duration as DelayTranche + 3; diff --git a/node/core/approval-voting/src/criteria.rs b/node/core/approval-voting/src/criteria.rs index 5880a8463c31..6a7544afe9d5 100644 --- a/node/core/approval-voting/src/criteria.rs +++ b/node/core/approval-voting/src/criteria.rs @@ -248,15 +248,14 @@ pub(crate) fn compute_assignments( ) -> HashMap { let (index, assignments_key): (ValidatorIndex, AssignmentPair) = { let key = config.assignment_keys.iter().enumerate() - .filter_map(|(i, p)| match keystore.key_pair(p) { + .find_map(|(i, p)| match keystore.key_pair(p) { Ok(pair) => Some((ValidatorIndex(i as _), pair)), Err(sc_keystore::Error::PairNotFound(_)) => None, Err(e) => { tracing::warn!(target: LOG_TARGET, "Encountered keystore error: {:?}", e); None } - }) - .next(); + }); match key { None => return Default::default(), @@ -535,7 +534,7 @@ mod tests { (0..n_groups).map(|i| { (i * size .. (i + 1) *size) .chain(if i < big_groups { Some(scraps + i) } else { None }) - .map(|j| j as ValidatorIndex) + .map(|j| ValidatorIndex(j as _)) .collect::>() }).collect() } @@ -565,7 +564,7 @@ mod tests { Sr25519Keyring::Bob, Sr25519Keyring::Charlie, ]), - validator_groups: vec![vec![0], vec![1, 2]], + validator_groups: vec![vec![ValidatorIndex(0)], vec![ValidatorIndex(1), ValidatorIndex(2)]], n_cores: 2, zeroth_delay_tranche_width: 10, relay_vrf_modulo_samples: 3, @@ -596,7 +595,7 @@ mod tests { Sr25519Keyring::Bob, Sr25519Keyring::Charlie, ]), - validator_groups: vec![vec![0], vec![1, 2]], + validator_groups: vec![vec![ValidatorIndex(0)], vec![ValidatorIndex(1), ValidatorIndex(2)]], n_cores: 2, zeroth_delay_tranche_width: 10, relay_vrf_modulo_samples: 3, @@ -660,7 +659,7 @@ mod tests { group: group_for_core(core.0 as _), cert: assignment.cert, own_group: GroupIndex(0), - val_index: 0, + val_index: ValidatorIndex(0), config: config.clone(), }; @@ -710,7 +709,7 @@ mod tests { #[test] fn check_rejects_nonexistent_key() { check_mutated_assignments(200, 100, 25, |m| { - m.val_index += 200; + m.val_index.0 += 200; Some(false) }); } diff --git a/node/core/approval-voting/src/import.rs b/node/core/approval-voting/src/import.rs index 224d9ca310ef..541c8ff4273d 100644 --- a/node/core/approval-voting/src/import.rs +++ b/node/core/approval-voting/src/import.rs @@ -692,6 +692,7 @@ mod tests { use super::*; use polkadot_node_subsystem_test_helpers::make_subsystem_context; use polkadot_node_primitives::approval::{VRFOutput, VRFProof}; + use polkadot_primitives::v1::ValidatorIndex; use polkadot_subsystem::messages::AllMessages; use sp_core::testing::TaskExecutor; use sp_runtime::{Digest, DigestItem}; @@ -1546,7 +1547,7 @@ mod tests { validators: vec![Sr25519Keyring::Alice.public().into(); 6], discovery_keys: Vec::new(), assignment_keys: Vec::new(), - validator_groups: vec![vec![0; 5], vec![0; 2]], + validator_groups: vec![vec![ValidatorIndex(0), ValidatorIndex(5)], vec![ValidatorIndex(0), ValidatorIndex(2)]], n_cores: 6, needed_approvals: 2, zeroth_delay_tranche_width: irrelevant, diff --git a/node/core/approval-voting/src/tests.rs b/node/core/approval-voting/src/tests.rs index 7c8c9f3d94fe..457bbeb6b6a7 100644 --- a/node/core/approval-voting/src/tests.rs +++ b/node/core/approval-voting/src/tests.rs @@ -243,7 +243,7 @@ impl Default for StateConfig { slot: Slot::from(0), tick: 0, validators: vec![Sr25519Keyring::Alice, Sr25519Keyring::Bob], - validator_groups: vec![vec![0], vec![1]], + validator_groups: vec![vec![ValidatorIndex(0)], vec![ValidatorIndex(1)]], needed_approvals: 1, no_show_slots: 2, } @@ -364,7 +364,7 @@ fn rejects_bad_assignment() { let block_hash = Hash::repeat_byte(0x01); let assignment_good = IndirectAssignmentCert { block_hash, - validator: 0, + validator: ValidatorIndex(0), cert: garbage_assignment_cert( AssignmentCertKind::RelayVRFModulo { sample: 0, @@ -386,7 +386,7 @@ fn rejects_bad_assignment() { // unknown hash let assignment = IndirectAssignmentCert { block_hash: Hash::repeat_byte(0x02), - validator: 0, + validator: ValidatorIndex(0), cert: garbage_assignment_cert( AssignmentCertKind::RelayVRFModulo { sample: 0, @@ -423,7 +423,7 @@ fn rejects_assignment_in_future() { let candidate_index = 0; let assignment = IndirectAssignmentCert { block_hash, - validator: 0, + validator: ValidatorIndex(0), cert: garbage_assignment_cert( AssignmentCertKind::RelayVRFModulo { sample: 0, @@ -467,7 +467,7 @@ fn rejects_assignment_with_unknown_candidate() { let candidate_index = 1; let assignment = IndirectAssignmentCert { block_hash, - validator: 0, + validator: ValidatorIndex(0), cert: garbage_assignment_cert( AssignmentCertKind::RelayVRFModulo { sample: 0, @@ -493,7 +493,7 @@ fn assignment_import_updates_candidate_entry_and_schedules_wakeup() { let candidate_index = 0; let assignment = IndirectAssignmentCert { block_hash, - validator: 0, + validator: ValidatorIndex(0), cert: garbage_assignment_cert( AssignmentCertKind::RelayVRFModulo { sample: 0, @@ -534,7 +534,7 @@ fn assignment_import_updates_candidate_entry_and_schedules_wakeup() { actions.get(1).unwrap(), Action::WriteCandidateEntry(c, e) => { assert_eq!(c, &candidate_hash); - assert!(e.approval_entry(&block_hash).unwrap().is_assigned(0)); + assert!(e.approval_entry(&block_hash).unwrap().is_assigned(ValidatorIndex(0))); } ); } @@ -554,7 +554,7 @@ fn rejects_approval_before_assignment() { let vote = IndirectSignedApprovalVote { block_hash, candidate_index: 0, - validator: 0, + validator: ValidatorIndex(0), signature: sign_approval(Sr25519Keyring::Alice, candidate_hash, 1), }; @@ -583,7 +583,7 @@ fn rejects_approval_if_no_candidate_entry() { let vote = IndirectSignedApprovalVote { block_hash, candidate_index: 0, - validator: 0, + validator: ValidatorIndex(0), signature: sign_approval(Sr25519Keyring::Alice, candidate_hash, 1), }; @@ -603,7 +603,7 @@ fn rejects_approval_if_no_candidate_entry() { fn rejects_approval_if_no_block_entry() { let block_hash = Hash::repeat_byte(0x01); let candidate_hash = CandidateHash(Hash::repeat_byte(0xCC)); - let validator_index = 0; + let validator_index = ValidatorIndex(0); let mut state = State { assignment_criteria: Box::new(MockAssignmentCriteria::check_only(|| { @@ -615,7 +615,7 @@ fn rejects_approval_if_no_block_entry() { let vote = IndirectSignedApprovalVote { block_hash, candidate_index: 0, - validator: 0, + validator: ValidatorIndex(0), signature: sign_approval(Sr25519Keyring::Alice, candidate_hash, 1), }; @@ -640,7 +640,7 @@ fn rejects_approval_if_no_block_entry() { fn accepts_and_imports_approval_after_assignment() { let block_hash = Hash::repeat_byte(0x01); let candidate_hash = CandidateHash(Hash::repeat_byte(0xCC)); - let validator_index = 0; + let validator_index = ValidatorIndex(0); let candidate_index = 0; let mut state = State { @@ -649,7 +649,7 @@ fn accepts_and_imports_approval_after_assignment() { })), ..some_state(StateConfig { validators: vec![Sr25519Keyring::Alice, Sr25519Keyring::Bob, Sr25519Keyring::Charlie], - validator_groups: vec![vec![0, 1], vec![2]], + validator_groups: vec![vec![ValidatorIndex(0), ValidatorIndex(1)], vec![ValidatorIndex(2)]], needed_approvals: 2, ..Default::default() }) @@ -680,7 +680,7 @@ fn accepts_and_imports_approval_after_assignment() { actions.get(0).unwrap(), Action::WriteCandidateEntry(c_hash, c_entry) => { assert_eq!(c_hash, &candidate_hash); - assert!(c_entry.approvals().get(validator_index as usize).unwrap()); + assert!(c_entry.approvals().get(validator_index.0 as usize).unwrap()); assert!(!c_entry.approval_entry(&block_hash).unwrap().is_approved()); } ); @@ -690,7 +690,7 @@ fn accepts_and_imports_approval_after_assignment() { fn second_approval_import_is_no_op() { let block_hash = Hash::repeat_byte(0x01); let candidate_hash = CandidateHash(Hash::repeat_byte(0xCC)); - let validator_index = 0; + let validator_index = ValidatorIndex(0); let candidate_index = 0; let mut state = State { @@ -699,7 +699,7 @@ fn second_approval_import_is_no_op() { })), ..some_state(StateConfig { validators: vec![Sr25519Keyring::Alice, Sr25519Keyring::Bob, Sr25519Keyring::Charlie], - validator_groups: vec![vec![0, 1], vec![2]], + validator_groups: vec![vec![ValidatorIndex(0), ValidatorIndex(1)], vec![ValidatorIndex(2)]], needed_approvals: 2, ..Default::default() }) @@ -734,8 +734,8 @@ fn second_approval_import_is_no_op() { fn check_and_apply_full_approval_sets_flag_and_bit() { let block_hash = Hash::repeat_byte(0x01); let candidate_hash = CandidateHash(Hash::repeat_byte(0xCC)); - let validator_index_a = 0; - let validator_index_b = 1; + let validator_index_a = ValidatorIndex(0); + let validator_index_b = ValidatorIndex(1); let mut state = State { assignment_criteria: Box::new(MockAssignmentCriteria::check_only(|| { @@ -743,7 +743,7 @@ fn check_and_apply_full_approval_sets_flag_and_bit() { })), ..some_state(StateConfig { validators: vec![Sr25519Keyring::Alice, Sr25519Keyring::Bob, Sr25519Keyring::Charlie], - validator_groups: vec![vec![0, 1], vec![2]], + validator_groups: vec![vec![ValidatorIndex(0), ValidatorIndex(1)], vec![ValidatorIndex(2)]], needed_approvals: 2, ..Default::default() }) @@ -795,8 +795,8 @@ fn check_and_apply_full_approval_sets_flag_and_bit() { fn check_and_apply_full_approval_does_not_load_cached_block_from_db() { let block_hash = Hash::repeat_byte(0x01); let candidate_hash = CandidateHash(Hash::repeat_byte(0xCC)); - let validator_index_a = 0; - let validator_index_b = 1; + let validator_index_a = ValidatorIndex(0); + let validator_index_b = ValidatorIndex(1); let mut state = State { assignment_criteria: Box::new(MockAssignmentCriteria::check_only(|| { @@ -804,7 +804,7 @@ fn check_and_apply_full_approval_does_not_load_cached_block_from_db() { })), ..some_state(StateConfig { validators: vec![Sr25519Keyring::Alice, Sr25519Keyring::Bob, Sr25519Keyring::Charlie], - validator_groups: vec![vec![0, 1], vec![2]], + validator_groups: vec![vec![ValidatorIndex(0), ValidatorIndex(1)], vec![ValidatorIndex(2)]], needed_approvals: 2, ..Default::default() }) @@ -867,7 +867,7 @@ fn assignment_triggered_by_all_with_less_than_supermajority() { AssignmentCertKind::RelayVRFModulo { sample: 0 } ), tranche: 1, - validator_index: 4, + validator_index: ValidatorIndex(4), triggered: false, }), assignments: bitvec::bitvec![BitOrderLsb0, u8; 0; 4], @@ -886,15 +886,15 @@ fn assignment_triggered_by_all_with_less_than_supermajority() { candidate_entry .approval_entry_mut(&block_hash) .unwrap() - .import_assignment(0, 0, 0); + .import_assignment(0, ValidatorIndex(0), 0); candidate_entry .approval_entry_mut(&block_hash) .unwrap() - .import_assignment(0, 1, 0); + .import_assignment(0, ValidatorIndex(1), 0); - candidate_entry.mark_approval(0); - candidate_entry.mark_approval(1); + candidate_entry.mark_approval(ValidatorIndex(0)); + candidate_entry.mark_approval(ValidatorIndex(1)); let tranche_now = 1; assert!(should_trigger_assignment( @@ -918,7 +918,7 @@ fn assignment_not_triggered_by_all_with_supermajority() { AssignmentCertKind::RelayVRFModulo { sample: 0 } ), tranche: 1, - validator_index: 4, + validator_index: ValidatorIndex(4), triggered: false, }), assignments: bitvec::bitvec![BitOrderLsb0, u8; 0; 4], @@ -937,21 +937,21 @@ fn assignment_not_triggered_by_all_with_supermajority() { candidate_entry .approval_entry_mut(&block_hash) .unwrap() - .import_assignment(0, 0, 0); + .import_assignment(0, ValidatorIndex(0), 0); candidate_entry .approval_entry_mut(&block_hash) .unwrap() - .import_assignment(0, 1, 0); + .import_assignment(0, ValidatorIndex(1), 0); candidate_entry .approval_entry_mut(&block_hash) .unwrap() - .import_assignment(0, 2, 0); + .import_assignment(0, ValidatorIndex(2), 0); - candidate_entry.mark_approval(0); - candidate_entry.mark_approval(1); - candidate_entry.mark_approval(2); + candidate_entry.mark_approval(ValidatorIndex(0)); + candidate_entry.mark_approval(ValidatorIndex(1)); + candidate_entry.mark_approval(ValidatorIndex(2)); let tranche_now = 1; assert!(!should_trigger_assignment( @@ -975,7 +975,7 @@ fn assignment_not_triggered_if_already_triggered() { AssignmentCertKind::RelayVRFModulo { sample: 0 } ), tranche: 1, - validator_index: 4, + validator_index: ValidatorIndex(4), triggered: true, }), assignments: bitvec::bitvec![BitOrderLsb0, u8; 0; 4], @@ -1012,7 +1012,7 @@ fn assignment_not_triggered_by_exact() { AssignmentCertKind::RelayVRFModulo { sample: 0 } ), tranche: 1, - validator_index: 4, + validator_index: ValidatorIndex(4), triggered: false, }), assignments: bitvec::bitvec![BitOrderLsb0, u8; 0; 4], @@ -1050,7 +1050,7 @@ fn assignment_not_triggered_more_than_maximum() { AssignmentCertKind::RelayVRFModulo { sample: 0 } ), tranche: maximum_broadcast + 1, - validator_index: 4, + validator_index: ValidatorIndex(4), triggered: false, }), assignments: bitvec::bitvec![BitOrderLsb0, u8; 0; 4], @@ -1093,7 +1093,7 @@ fn assignment_triggered_if_at_maximum() { AssignmentCertKind::RelayVRFModulo { sample: 0 } ), tranche: maximum_broadcast, - validator_index: 4, + validator_index: ValidatorIndex(4), triggered: false, }), assignments: bitvec::bitvec![BitOrderLsb0, u8; 0; 4], @@ -1136,7 +1136,7 @@ fn assignment_not_triggered_if_at_maximum_but_clock_is_before() { AssignmentCertKind::RelayVRFModulo { sample: 0 } ), tranche: maximum_broadcast, - validator_index: 4, + validator_index: ValidatorIndex(4), triggered: false, }), assignments: bitvec::bitvec![BitOrderLsb0, u8; 0; 4], @@ -1179,7 +1179,7 @@ fn assignment_not_triggered_if_at_maximum_but_clock_is_before_with_drift() { AssignmentCertKind::RelayVRFModulo { sample: 0 } ), tranche: maximum_broadcast, - validator_index: 4, + validator_index: ValidatorIndex(4), triggered: false, }), assignments: bitvec::bitvec![BitOrderLsb0, u8; 0; 4], @@ -1259,8 +1259,8 @@ fn block_not_approved_until_all_candidates_approved() { let candidate_hash = CandidateHash(Hash::repeat_byte(0xCC)); let candidate_hash_2 = CandidateHash(Hash::repeat_byte(0xDD)); - let validator_index_a = 0; - let validator_index_b = 1; + let validator_index_a = ValidatorIndex(0); + let validator_index_b = ValidatorIndex(1); let mut state = State { assignment_criteria: Box::new(MockAssignmentCriteria::check_only(|| { @@ -1268,7 +1268,7 @@ fn block_not_approved_until_all_candidates_approved() { })), ..some_state(StateConfig { validators: vec![Sr25519Keyring::Alice, Sr25519Keyring::Bob, Sr25519Keyring::Charlie], - validator_groups: vec![vec![0, 1], vec![2]], + validator_groups: vec![vec![ValidatorIndex(0), ValidatorIndex(1)], vec![ValidatorIndex(2)]], needed_approvals: 2, ..Default::default() }) @@ -1341,8 +1341,8 @@ fn candidate_approval_applied_to_all_blocks() { let block_hash = Hash::repeat_byte(0x01); let block_hash_2 = Hash::repeat_byte(0x02); let candidate_hash = CandidateHash(Hash::repeat_byte(0xCC)); - let validator_index_a = 0; - let validator_index_b = 1; + let validator_index_a = ValidatorIndex(0); + let validator_index_b = ValidatorIndex(1); let slot = Slot::from(1); let session_index = 1; @@ -1353,7 +1353,7 @@ fn candidate_approval_applied_to_all_blocks() { })), ..some_state(StateConfig { validators: vec![Sr25519Keyring::Alice, Sr25519Keyring::Bob, Sr25519Keyring::Charlie], - validator_groups: vec![vec![0, 1], vec![2]], + validator_groups: vec![vec![ValidatorIndex(0), ValidatorIndex(1)], vec![ValidatorIndex(2)]], needed_approvals: 2, session_index, slot, @@ -1456,7 +1456,7 @@ fn approved_ancestor_all_approved() { })), ..some_state(StateConfig { validators: vec![Sr25519Keyring::Alice, Sr25519Keyring::Bob], - validator_groups: vec![vec![0], vec![1]], + validator_groups: vec![vec![ValidatorIndex(0)], vec![ValidatorIndex(1)]], needed_approvals: 2, session_index, slot, @@ -1538,7 +1538,7 @@ fn approved_ancestor_missing_approval() { })), ..some_state(StateConfig { validators: vec![Sr25519Keyring::Alice, Sr25519Keyring::Bob], - validator_groups: vec![vec![0], vec![1]], + validator_groups: vec![vec![ValidatorIndex(0)], vec![ValidatorIndex(1)]], needed_approvals: 2, session_index, slot, @@ -1615,7 +1615,7 @@ fn process_wakeup_trigger_assignment_launch_approval() { })), ..some_state(StateConfig { validators: vec![Sr25519Keyring::Alice, Sr25519Keyring::Bob], - validator_groups: vec![vec![0], vec![1]], + validator_groups: vec![vec![ValidatorIndex(0)], vec![ValidatorIndex(1)]], needed_approvals: 2, session_index, slot, @@ -1641,7 +1641,7 @@ fn process_wakeup_trigger_assignment_launch_approval() { AssignmentCertKind::RelayVRFModulo { sample: 0 } ), tranche: 0, - validator_index: 0, + validator_index: ValidatorIndex(0), triggered: false, }.into()); @@ -1700,7 +1700,7 @@ fn process_wakeup_schedules_wakeup() { })), ..some_state(StateConfig { validators: vec![Sr25519Keyring::Alice, Sr25519Keyring::Bob], - validator_groups: vec![vec![0], vec![1]], + validator_groups: vec![vec![ValidatorIndex(0)], vec![ValidatorIndex(1)]], needed_approvals: 2, session_index, slot, @@ -1718,7 +1718,7 @@ fn process_wakeup_schedules_wakeup() { AssignmentCertKind::RelayVRFModulo { sample: 0 } ), tranche: 10, - validator_index: 0, + validator_index: ValidatorIndex(0), triggered: false, }.into()); diff --git a/node/core/av-store/src/tests.rs b/node/core/av-store/src/tests.rs index 1d75e2b9beb9..c92e28ce3d8b 100644 --- a/node/core/av-store/src/tests.rs +++ b/node/core/av-store/src/tests.rs @@ -260,7 +260,7 @@ fn runtime_api_error_does_not_stop_the_subsystem() { // but that's fine, we're still alive let (tx, rx) = oneshot::channel(); let candidate_hash = CandidateHash(Hash::repeat_byte(33)); - let validator_index = 5; + let validator_index = ValidatorIndex(5); let query_chunk = AvailabilityStoreMessage::QueryChunk( candidate_hash, validator_index, @@ -281,7 +281,7 @@ fn store_chunk_works() { let TestHarness { mut virtual_overseer } = test_harness; let relay_parent = Hash::repeat_byte(32); let candidate_hash = CandidateHash(Hash::repeat_byte(33)); - let validator_index = 5; + let validator_index = ValidatorIndex(5); let n_validators = 10; let chunk = ErasureChunk { @@ -333,7 +333,7 @@ fn store_chunk_does_nothing_if_no_entry_already() { let TestHarness { mut virtual_overseer } = test_harness; let relay_parent = Hash::repeat_byte(32); let candidate_hash = CandidateHash(Hash::repeat_byte(33)); - let validator_index = 5; + let validator_index = ValidatorIndex(5); let chunk = ErasureChunk { chunk: vec![1, 2, 3], @@ -372,7 +372,7 @@ fn query_chunk_checks_meta() { test_harness(TestState::default(), store.clone(), |test_harness| async move { let TestHarness { mut virtual_overseer } = test_harness; let candidate_hash = CandidateHash(Hash::repeat_byte(33)); - let validator_index = 5; + let validator_index = ValidatorIndex(5); let n_validators = 10; // Ensure an entry already exists. In reality this would come from watching @@ -382,7 +382,7 @@ fn query_chunk_checks_meta() { data_available: false, chunks_stored: { let mut v = bitvec::bitvec![BitOrderLsb0, u8; 0; n_validators]; - v.set(validator_index as usize, true); + v.set(validator_index.0 as usize, true); v }, state: State::Unavailable(BETimestamp(0)), @@ -402,7 +402,7 @@ fn query_chunk_checks_meta() { let (tx, rx) = oneshot::channel(); let query_chunk = AvailabilityStoreMessage::QueryChunkAvailability( candidate_hash, - validator_index + 1, + ValidatorIndex(validator_index.0 + 1), tx, ); @@ -418,7 +418,7 @@ fn store_block_works() { test_harness(test_state.clone(), store.clone(), |test_harness| async move { let TestHarness { mut virtual_overseer } = test_harness; let candidate_hash = CandidateHash(Hash::repeat_byte(1)); - let validator_index = 5; + let validator_index = ValidatorIndex(5); let n_validators = 10; let pov = PoV { @@ -455,7 +455,7 @@ fn store_block_works() { let branch = branches.nth(5).unwrap(); let expected_chunk = ErasureChunk { chunk: branch.1.to_vec(), - index: 5, + index: ValidatorIndex(5), proof: branch.0, }; @@ -497,10 +497,10 @@ fn store_pov_and_query_chunk_works() { assert_eq!(rx.await.unwrap(), Ok(())); - for validator_index in 0..n_validators { - let chunk = query_chunk(&mut virtual_overseer, candidate_hash, validator_index).await.unwrap(); + for i in 0..n_validators { + let chunk = query_chunk(&mut virtual_overseer, candidate_hash, ValidatorIndex(i as _)).await.unwrap(); - assert_eq!(chunk.chunk, chunks_expected[validator_index as usize]); + assert_eq!(chunk.chunk, chunks_expected[i as usize]); } }); } @@ -842,7 +842,7 @@ async fn query_available_data( async fn query_chunk( virtual_overseer: &mut test_helpers::TestSubsystemContextHandle, candidate_hash: CandidateHash, - index: u32, + index: ValidatorIndex, ) -> Option { let (tx, rx) = oneshot::channel(); @@ -859,7 +859,7 @@ async fn query_all_chunks( expect_present: bool, ) -> bool { for i in 0..n_validators { - if query_chunk(virtual_overseer, candidate_hash, i).await.is_some() != expect_present { + if query_chunk(virtual_overseer, candidate_hash, ValidatorIndex(i)).await.is_some() != expect_present { return false } } diff --git a/node/core/backing/src/lib.rs b/node/core/backing/src/lib.rs index dc8279592894..e58a88c1aeed 100644 --- a/node/core/backing/src/lib.rs +++ b/node/core/backing/src/lib.rs @@ -1281,7 +1281,8 @@ mod tests { let validator_public = validator_pubkeys(&validators); - let validator_groups = vec![vec![2, 0, 3, 5], vec![1], vec![4]]; + let validator_groups = vec![vec![2, 0, 3, 5], vec![1], vec![4]] + .into_iter().map(|g| g.into_iter().map(ValidatorIndex).collect()).collect(); let group_rotation_info = GroupRotationInfo { session_start_block: 0, group_rotation_frequency: 100, @@ -1598,7 +1599,7 @@ mod tests { &test_state.keystore, Statement::Seconded(candidate_a.clone()), &test_state.signing_context, - 2, + ValidatorIndex(2), &public2.into(), ).await.expect("should be signed"); @@ -1606,7 +1607,7 @@ mod tests { &test_state.keystore, Statement::Valid(candidate_a_hash), &test_state.signing_context, - 5, + ValidatorIndex(5), &public1.into(), ).await.expect("should be signed"); @@ -1740,7 +1741,7 @@ mod tests { &test_state.keystore, Statement::Seconded(candidate_a.clone()), &test_state.signing_context, - 2, + ValidatorIndex(2), &public2.into(), ).await.expect("should be signed"); @@ -1748,7 +1749,7 @@ mod tests { &test_state.keystore, Statement::Valid(candidate_a_hash), &test_state.signing_context, - 5, + ValidatorIndex(5), &public1.into(), ).await.expect("should be signed"); @@ -1756,7 +1757,7 @@ mod tests { &test_state.keystore, Statement::Valid(candidate_a_hash), &test_state.signing_context, - 3, + ValidatorIndex(3), &public3.into(), ).await.expect("should be signed"); @@ -1893,7 +1894,7 @@ mod tests { &test_state.keystore, Statement::Seconded(candidate_a.clone()), &test_state.signing_context, - 2, + ValidatorIndex(2), &public2.into(), ).await.expect("should be signed"); @@ -1901,7 +1902,7 @@ mod tests { &test_state.keystore, Statement::Invalid(candidate_a_hash), &test_state.signing_context, - 2, + ValidatorIndex(2), &public2.into(), ).await.expect("should be signed"); @@ -1909,7 +1910,7 @@ mod tests { &test_state.keystore, Statement::Invalid(candidate_a_hash), &test_state.signing_context, - 0, + ValidatorIndex(0), &public0.into(), ).await.expect("should be signed"); @@ -2001,7 +2002,7 @@ mod tests { validator_index, s1, &test_state.signing_context, - &test_state.validator_public[validator_index as usize], + &test_state.validator_public[validator_index.0 as usize], ).expect("signature must be valid"); SignedFullStatement::new( @@ -2009,7 +2010,7 @@ mod tests { validator_index, s2, &test_state.signing_context, - &test_state.validator_public[validator_index as usize], + &test_state.validator_public[validator_index.0 as usize], ).expect("signature must be valid"); } ); @@ -2041,7 +2042,7 @@ mod tests { validator_index, s1, &test_state.signing_context, - &test_state.validator_public[validator_index as usize], + &test_state.validator_public[validator_index.0 as usize], ).expect("signature must be valid"); SignedFullStatement::new( @@ -2049,7 +2050,7 @@ mod tests { validator_index, s2, &test_state.signing_context, - &test_state.validator_public[validator_index as usize], + &test_state.validator_public[validator_index.0 as usize], ).expect("signature must be valid"); } ); @@ -2222,7 +2223,7 @@ mod tests { &test_state.keystore, Statement::Seconded(candidate.clone()), &test_state.signing_context, - 2, + ValidatorIndex(2), &validator2.into(), ).await.expect("should be signed"); @@ -2360,7 +2361,7 @@ mod tests { &test_state.keystore, Statement::Seconded(candidate.clone()), &test_state.signing_context, - 2, + ValidatorIndex(2), &public2.into(), ).await.expect("should be signed"); @@ -2502,7 +2503,7 @@ mod tests { &test_state.keystore, Statement::Seconded(candidate_a.clone()), &test_state.signing_context, - 2, + ValidatorIndex(2), &public2.into(), ).await.expect("should be signed"); @@ -2541,7 +2542,7 @@ mod tests { let validator_public = validator_pubkeys(&validators); let validator_groups = { let mut validator_groups = HashMap::new(); - validator_groups.insert(para_id, vec![0, 1, 2, 3, 4, 5]); + validator_groups.insert(para_id, vec![0, 1, 2, 3, 4, 5].into_iter().map(ValidatorIndex).collect()); validator_groups }; @@ -2566,9 +2567,9 @@ mod tests { let attested = TableAttestedCandidate { candidate: Default::default(), validity_votes: vec![ - (5, fake_attestation(5)), - (3, fake_attestation(3)), - (1, fake_attestation(1)), + (ValidatorIndex(5), fake_attestation(5)), + (ValidatorIndex(3), fake_attestation(3)), + (ValidatorIndex(1), fake_attestation(1)), ], group_id: para_id, }; diff --git a/node/network/approval-distribution/src/tests.rs b/node/network/approval-distribution/src/tests.rs index 5e0753749e2a..ed511e1e1113 100644 --- a/node/network/approval-distribution/src/tests.rs +++ b/node/network/approval-distribution/src/tests.rs @@ -208,7 +208,7 @@ fn try_import_the_same_assignment() { overseer_send(overseer, msg).await; // send the assignment related to `hash` - let validator_index = 0u32; + let validator_index = ValidatorIndex(0); let cert = fake_assignment_cert(hash, validator_index); let assignments = vec![(cert.clone(), 0u32)]; @@ -299,7 +299,7 @@ fn spam_attack_results_in_negative_reputation_change() { // to populate our knowledge let assignments: Vec<_> = (0..candidates_count) .map(|candidate_index| { - let validator_index = candidate_index as u32; + let validator_index = ValidatorIndex(candidate_index as u32); let cert = fake_assignment_cert(hash_b, validator_index); (cert, candidate_index as u32) }).collect(); @@ -372,7 +372,7 @@ fn import_approval_happy_path() { overseer_send(overseer, msg).await; // import an assignment related to `hash` locally - let validator_index = 0u32; + let validator_index = ValidatorIndex(0); let candidate_index = 0u32; let cert = fake_assignment_cert(hash, validator_index); overseer_send( @@ -455,7 +455,7 @@ fn import_approval_bad() { let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]); overseer_send(overseer, msg).await; - let validator_index = 0u32; + let validator_index = ValidatorIndex(0); let candidate_index = 0u32; let cert = fake_assignment_cert(hash, validator_index); @@ -616,8 +616,8 @@ fn update_peer_view() { let msg = ApprovalDistributionMessage::NewBlocks(vec![meta_a, meta_b, meta_c]); overseer_send(overseer, msg).await; - let cert_a = fake_assignment_cert(hash_a, 0); - let cert_b = fake_assignment_cert(hash_b, 0); + let cert_a = fake_assignment_cert(hash_a, ValidatorIndex(0)); + let cert_b = fake_assignment_cert(hash_b, ValidatorIndex(0)); overseer_send( overseer, @@ -670,7 +670,7 @@ fn update_peer_view() { ) ).await; - let cert_c = fake_assignment_cert(hash_c, 0); + let cert_c = fake_assignment_cert(hash_c, ValidatorIndex(0)); overseer_send( overseer, @@ -753,7 +753,7 @@ fn import_remotely_then_locally() { overseer_send(overseer, msg).await; // import the assignment remotely first - let validator_index = 0u32; + let validator_index = ValidatorIndex(0); let candidate_index = 0u32; let cert = fake_assignment_cert(hash, validator_index); let assignments = vec![(cert.clone(), candidate_index)]; diff --git a/node/network/availability-recovery/src/tests.rs b/node/network/availability-recovery/src/tests.rs index 40b708d387f8..bbe63dc8a093 100644 --- a/node/network/availability-recovery/src/tests.rs +++ b/node/network/availability-recovery/src/tests.rs @@ -184,7 +184,7 @@ impl TestState { validators: self.validator_public.clone(), discovery_keys: self.validator_authority_id.clone(), // all validators in the same group. - validator_groups: vec![(0..self.validators.len()).map(|i| i as ValidatorIndex).collect()], + validator_groups: vec![(0..self.validators.len()).map(|i| ValidatorIndex(i as _)).collect()], ..Default::default() }))).unwrap(); } @@ -272,10 +272,10 @@ impl TestState { virtual_overseer, AvailabilityRecoveryMessage::NetworkBridgeUpdateV1( NetworkBridgeEvent::PeerMessage( - self.validator_peer_id[validator_index as usize].clone(), + self.validator_peer_id[validator_index.0 as usize].clone(), protocol_v1::AvailabilityRecoveryMessage::Chunk( request_id, - Some(self.chunks[validator_index as usize].clone()), + Some(self.chunks[validator_index.0 as usize].clone()), ) ) ) @@ -317,10 +317,10 @@ impl TestState { virtual_overseer, AvailabilityRecoveryMessage::NetworkBridgeUpdateV1( NetworkBridgeEvent::PeerMessage( - self.validator_peer_id[validator_index as usize].clone(), + self.validator_peer_id[validator_index.0 as usize].clone(), protocol_v1::AvailabilityRecoveryMessage::Chunk( request_id, - Some(self.chunks[validator_index as usize].clone()), + Some(self.chunks[validator_index.0 as usize].clone()), ) ) ) @@ -457,7 +457,7 @@ fn derive_erasure_chunks_with_proofs_and_root( .enumerate() .map(|(index, (proof, chunk))| ErasureChunk { chunk: chunk.to_vec(), - index: index as _, + index: ValidatorIndex(index as _), proof, }) .collect::>(); diff --git a/node/network/bitfield-distribution/src/lib.rs b/node/network/bitfield-distribution/src/lib.rs index e1f4df4a41d8..fac59333664d 100644 --- a/node/network/bitfield-distribution/src/lib.rs +++ b/node/network/bitfield-distribution/src/lib.rs @@ -767,7 +767,7 @@ mod test { use bitvec::bitvec; use futures::executor; use maplit::hashmap; - use polkadot_primitives::v1::{Signed, AvailabilityBitfield}; + use polkadot_primitives::v1::{Signed, AvailabilityBitfield, ValidatorIndex}; use polkadot_node_subsystem_test_helpers::make_subsystem_context; use polkadot_node_subsystem_util::TimeoutExt; use sp_keystore::{SyncCryptoStorePtr, SyncCryptoStore}; @@ -882,7 +882,7 @@ mod test { &keystore, payload, &signing_context, - 0, + ValidatorIndex(0), &malicious.into(), )).expect("should be signed"); @@ -947,7 +947,7 @@ mod test { &keystore, payload, &signing_context, - 42, + ValidatorIndex(42), &validator, )).expect("should be signed"); @@ -1004,7 +1004,7 @@ mod test { &keystore, payload, &signing_context, - 0, + ValidatorIndex(0), &validator, )).expect("should be signed"); @@ -1119,7 +1119,7 @@ mod test { &keystore, payload, &signing_context, - 0, + ValidatorIndex(0), &validator, )).expect("should be signed"); @@ -1215,7 +1215,7 @@ mod test { &keystore, payload, &signing_context, - 0, + ValidatorIndex(0), &validator, )).expect("should be signed"); @@ -1374,7 +1374,7 @@ mod test { &keystore, payload, &signing_context, - 0, + ValidatorIndex(0), &validator, )).expect("should be signed"); diff --git a/node/network/collator-protocol/src/collator_side.rs b/node/network/collator-protocol/src/collator_side.rs index eccab4be21a2..d31365f2784a 100644 --- a/node/network/collator-protocol/src/collator_side.rs +++ b/node/network/collator-protocol/src/collator_side.rs @@ -937,7 +937,8 @@ mod tests { .take(validator_public.len()) .collect(); - let validator_groups = vec![vec![2, 0, 4], vec![3, 2, 4]]; + let validator_groups = vec![vec![2, 0, 4], vec![3, 2, 4]] + .into_iter().map(|g| g.into_iter().map(ValidatorIndex).collect()).collect(); let group_rotation_info = GroupRotationInfo { session_start_block: 0, group_rotation_frequency: 100, @@ -979,20 +980,20 @@ mod tests { } fn current_group_validator_peer_ids(&self) -> Vec { - self.current_group_validator_indices().iter().map(|i| self.validator_peer_id[*i as usize].clone()).collect() + self.current_group_validator_indices().iter().map(|i| self.validator_peer_id[i.0 as usize].clone()).collect() } fn current_group_validator_authority_ids(&self) -> Vec { self.current_group_validator_indices() .iter() - .map(|i| self.validator_authority_id[*i as usize].clone()) + .map(|i| self.validator_authority_id[i.0 as usize].clone()) .collect() } fn current_group_validator_ids(&self) -> Vec { self.current_group_validator_indices() .iter() - .map(|i| self.validator_public[*i as usize].clone()) + .map(|i| self.validator_public[i.0 as usize].clone()) .collect() } @@ -1003,7 +1004,7 @@ mod tests { fn next_group_validator_authority_ids(&self) -> Vec { self.next_group_validator_indices() .iter() - .map(|i| self.validator_authority_id[*i as usize].clone()) + .map(|i| self.validator_authority_id[i.0 as usize].clone()) .collect() } diff --git a/node/network/pov-distribution/src/tests.rs b/node/network/pov-distribution/src/tests.rs index 2dfc0ce11f96..d8ceab55374c 100644 --- a/node/network/pov-distribution/src/tests.rs +++ b/node/network/pov-distribution/src/tests.rs @@ -174,7 +174,8 @@ impl Default for TestState { .take(validator_public.len()) .collect(); - let validator_groups = vec![vec![2, 0, 4], vec![1], vec![3]].into_iter().map(|g| g.map(ValidatorIndex)).collect(); + let validator_groups = vec![vec![2, 0, 4], vec![1], vec![3]] + .into_iter().map(|g| g.into_iter().map(ValidatorIndex).collect()).collect(); let group_rotation_info = GroupRotationInfo { session_start_block: 0, group_rotation_frequency: 100, @@ -737,7 +738,8 @@ fn we_inform_peers_with_same_view_we_are_awaiting() { .take(validators.len()) .collect(); - let validator_groups = vec![vec![2, 0, 4], vec![1], vec![3]].map(|g| g.map(ValidatorIndex)).collect(); + let validator_groups = vec![vec![2, 0, 4], vec![1], vec![3]] + .into_iter().map(|g| g.into_iter().map(ValidatorIndex).collect()).collect(); let group_rotation_info = GroupRotationInfo { session_start_block: 0, group_rotation_frequency: 100, diff --git a/runtime/parachains/src/inclusion.rs b/runtime/parachains/src/inclusion.rs index 1ed91b386817..d53f3240f6b6 100644 --- a/runtime/parachains/src/inclusion.rs +++ b/runtime/parachains/src/inclusion.rs @@ -1602,7 +1602,7 @@ mod tests { group_index if group_index == GroupIndex::from(1) => Some(vec![2, 3]), group_index if group_index == GroupIndex::from(2) => Some(vec![4]), _ => panic!("Group index out of bounds for 2 parachains and 1 parathread core"), - }.map(|m| m.map(ValidatorIndex)); + }.map(|m| m.into_iter().map(ValidatorIndex).collect::>()); let thread_collator: CollatorId = Sr25519Keyring::Two.public().into(); @@ -2089,7 +2089,7 @@ mod tests { group_index if group_index == GroupIndex::from(1) => Some(vec![2, 3]), group_index if group_index == GroupIndex::from(2) => Some(vec![4]), _ => panic!("Group index out of bounds for 2 parachains and 1 parathread core"), - }; + }.map(|vs| vs.into_iter().map(ValidatorIndex).collect::>()); let thread_collator: CollatorId = Sr25519Keyring::Two.public().into(); @@ -2284,7 +2284,7 @@ mod tests { let group_validators = |group_index: GroupIndex| match group_index { group_index if group_index == GroupIndex::from(0) => Some(vec![0, 1, 2, 3, 4]), _ => panic!("Group index out of bounds for 1 parachain"), - }; + }.map(|vs| vs.into_iter().map(ValidatorIndex).collect::>()); let chain_a_assignment = CoreAssignment { core: CoreIndex::from(0), @@ -2382,7 +2382,7 @@ mod tests { run_to_block(10, |_| None); >::insert( - &0, + &ValidatorIndex(0), AvailabilityBitfieldRecord { bitfield: default_bitfield(), submitted_at: 9, @@ -2390,7 +2390,7 @@ mod tests { ); >::insert( - &1, + &ValidatorIndex(1), AvailabilityBitfieldRecord { bitfield: default_bitfield(), submitted_at: 9, @@ -2398,7 +2398,7 @@ mod tests { ); >::insert( - &4, + &ValidatorIndex(4), AvailabilityBitfieldRecord { bitfield: default_bitfield(), submitted_at: 9, @@ -2435,9 +2435,9 @@ mod tests { assert_eq!(Validators::get(), validator_public); assert_eq!(shared::Module::::session_index(), 5); - assert!(>::get(&0).is_some()); - assert!(>::get(&1).is_some()); - assert!(>::get(&4).is_some()); + assert!(>::get(&ValidatorIndex(0)).is_some()); + assert!(>::get(&ValidatorIndex(1)).is_some()); + assert!(>::get(&ValidatorIndex(4)).is_some()); assert!(>::get(&chain_a).is_some()); assert!(>::get(&chain_b).is_some()); @@ -2459,9 +2459,9 @@ mod tests { assert_eq!(Validators::get(), validator_public_new); assert_eq!(shared::Module::::session_index(), 6); - assert!(>::get(&0).is_none()); - assert!(>::get(&1).is_none()); - assert!(>::get(&4).is_none()); + assert!(>::get(&ValidatorIndex(0)).is_none()); + assert!(>::get(&ValidatorIndex(1)).is_none()); + assert!(>::get(&ValidatorIndex(4)).is_none()); assert!(>::get(&chain_a).is_none()); assert!(>::get(&chain_b).is_none()); From 00e2f69058136bd96af7ec3bc013e4eec0c5c74d Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 19 Feb 2021 22:10:41 +0100 Subject: [PATCH 33/60] Fix test. --- node/core/approval-voting/src/import.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node/core/approval-voting/src/import.rs b/node/core/approval-voting/src/import.rs index 541c8ff4273d..78d0ecc863c5 100644 --- a/node/core/approval-voting/src/import.rs +++ b/node/core/approval-voting/src/import.rs @@ -1547,7 +1547,7 @@ mod tests { validators: vec![Sr25519Keyring::Alice.public().into(); 6], discovery_keys: Vec::new(), assignment_keys: Vec::new(), - validator_groups: vec![vec![ValidatorIndex(0), ValidatorIndex(5)], vec![ValidatorIndex(0), ValidatorIndex(2)]], + validator_groups: vec![vec![ValidatorIndex(0); 5], vec![ValidatorIndex(0); 2]], n_cores: 6, needed_approvals: 2, zeroth_delay_tranche_width: irrelevant, From c837d98ff7de9a287d12cabd909196cbe89119d0 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 19 Feb 2021 22:38:09 +0100 Subject: [PATCH 34/60] Cleanup + get rid of some warnings. --- node/network/availability-distribution/src/lib.rs | 9 ++++----- node/network/availability-distribution/src/requester.rs | 6 ++---- .../src/requester/fetch_task.rs | 4 ++-- node/network/availability-distribution/src/responder.rs | 2 +- 4 files changed, 9 insertions(+), 12 deletions(-) diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs index ab68a899582b..4358511a62b8 100644 --- a/node/network/availability-distribution/src/lib.rs +++ b/node/network/availability-distribution/src/lib.rs @@ -42,15 +42,14 @@ mod session_cache; const LOG_TARGET: &'static str = "availability_distribution"; /// Availability Distribution metrics. -/// TODO: Dummy for now. type Metrics = (); /// The availability distribution subsystem. pub struct AvailabilityDistributionSubsystem { /// Pointer to a keystore, which is required for determining this nodes validator index. keystore: SyncCryptoStorePtr, - /// Prometheus metrics. - metrics: Metrics, + //// Prometheus metrics. + // metrics: Metrics, } impl Subsystem for AvailabilityDistributionSubsystem @@ -72,8 +71,8 @@ where impl AvailabilityDistributionSubsystem { /// Create a new instance of the availability distribution. - pub fn new(keystore: SyncCryptoStorePtr, metrics: Metrics) -> Self { - Self { keystore, metrics } + pub fn new(keystore: SyncCryptoStorePtr, _metrics: Metrics) -> Self { + Self { keystore } } /// Start processing work as passed on from the Overseer. diff --git a/node/network/availability-distribution/src/requester.rs b/node/network/availability-distribution/src/requester.rs index 799d149b2c10..8d033d474b46 100644 --- a/node/network/availability-distribution/src/requester.rs +++ b/node/network/availability-distribution/src/requester.rs @@ -57,8 +57,6 @@ pub struct Requester { fetches: HashMap, /// Localized information about sessions we are currently interested in. - /// - /// This is the current one and the last one. session_cache: SessionCache, /// Sender to be cloned for `FetchTask`s. @@ -73,7 +71,7 @@ impl Requester { /// /// You must feed it with `ActiveLeavesUpdate` via `update_fetching_heads` and make it progress /// by advancing the stream. - pub(crate) fn new(keystore: SyncCryptoStorePtr) -> Self { + pub fn new(keystore: SyncCryptoStorePtr) -> Self { // All we do is forwarding messages, no need to make this big. let (tx, rx) = mpsc::channel(1); Requester { @@ -86,7 +84,7 @@ impl Requester { /// Update heads that need availability distribution. /// /// For all active heads we will be fetching our chunks for availabilty distribution. - pub(crate) async fn update_fetching_heads( + pub async fn update_fetching_heads( &mut self, ctx: &mut Context, update: ActiveLeavesUpdate, diff --git a/node/network/availability-distribution/src/requester/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task.rs index f19dc98ba743..329f5051f0b4 100644 --- a/node/network/availability-distribution/src/requester/fetch_task.rs +++ b/node/network/availability-distribution/src/requester/fetch_task.rs @@ -58,7 +58,7 @@ pub struct FetchTask { /// stop keeping track of that candidate/chunk. live_in: HashSet, - /// We keep the task around in state `Fetched` until `live_in` becomes empty, to make + /// We keep the task around in until `live_in` becomes empty, to make /// sure we won't re-fetch an already fetched candidate. state: FetchedState, } @@ -80,7 +80,7 @@ pub enum FromFetchTask { /// Concluded with result. /// - /// In case of `None` everything was fine, in case of `Some` some validators in the group + /// In case of `None` everything was fine, in case of `Some`, some validators in the group /// did not serve us our chunk as expected. Concluded(Option), } diff --git a/node/network/availability-distribution/src/responder.rs b/node/network/availability-distribution/src/responder.rs index 23ec112030df..8208ed39c057 100644 --- a/node/network/availability-distribution/src/responder.rs +++ b/node/network/availability-distribution/src/responder.rs @@ -14,7 +14,7 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -//! Responder answers requests for availability chunks. +//! Answer requests for availability chunks. use futures::channel::oneshot; From 8945fbb8a2d86bcfe5a45fd951267d6f2bc5e81e Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Mon, 22 Feb 2021 19:07:51 +0100 Subject: [PATCH 35/60] state -> requester --- node/network/availability-distribution/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs index 4358511a62b8..3226d9d972d5 100644 --- a/node/network/availability-distribution/src/lib.rs +++ b/node/network/availability-distribution/src/lib.rs @@ -80,13 +80,13 @@ impl AvailabilityDistributionSubsystem { where Context: SubsystemContext + Sync + Send, { - let mut state = Requester::new(self.keystore.clone()).fuse(); + let mut requester = Requester::new(self.keystore.clone()).fuse(); loop { let action = { let mut subsystem_next = ctx.recv().fuse(); futures::select! { subsystem_msg = subsystem_next => Either::Left(subsystem_msg), - from_task = state.next() => Either::Right(from_task), + from_task = requester.next() => Either::Right(from_task), } }; @@ -104,7 +104,7 @@ impl AvailabilityDistributionSubsystem { match message { FromOverseer::Signal(OverseerSignal::ActiveLeaves(update)) => { // Update the relay chain heads we are fetching our pieces for: - state + requester .get_mut() .update_fetching_heads(&mut ctx, update) .await?; From c9984fbd6638cc5cfd87077a5db7072e3baee40a Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Mon, 22 Feb 2021 19:43:07 +0100 Subject: [PATCH 36/60] Mostly doc fixes. --- .../src/requester.rs | 8 +++-- .../src/requester/fetch_task.rs | 11 +++++-- .../src/responder.rs | 1 + .../src/session_cache.rs | 29 +++++++++++-------- 4 files changed, 31 insertions(+), 18 deletions(-) diff --git a/node/network/availability-distribution/src/requester.rs b/node/network/availability-distribution/src/requester.rs index 8d033d474b46..4218321b4938 100644 --- a/node/network/availability-distribution/src/requester.rs +++ b/node/network/availability-distribution/src/requester.rs @@ -123,7 +123,7 @@ impl Requester { /// Stop requesting chunks for obsolete heads. /// fn stop_requesting_chunks(&mut self, obsolete_leaves: impl Iterator) { - let obsolete_leaves: HashSet<_> = obsolete_leaves.into_iter().collect(); + let obsolete_leaves: HashSet<_> = obsolete_leaves.collect(); self.fetches.retain(|_, task| { task.remove_leaves(&obsolete_leaves); task.is_live() @@ -155,6 +155,7 @@ impl Requester { } Entry::Vacant(e) => { let tx = self.tx.clone(); + let task_cfg = self .session_cache .with_session_info( @@ -163,6 +164,7 @@ impl Requester { |info| FetchTaskConfig::new(leaf, &core, tx, info), ) .await?; + if let Some(task_cfg) = task_cfg { e.insert(FetchTask::start(task_cfg, ctx).await?); } @@ -200,8 +202,8 @@ impl Stream for Requester { } } -///// Query all hashes and descriptors of candidates pending availability at a particular block. -// #[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))] +/// Query all hashes and descriptors of candidates pending availability at a particular block. +#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))] async fn query_occupied_cores( ctx: &mut Context, relay_parent: Hash, diff --git a/node/network/availability-distribution/src/requester/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task.rs index 329f5051f0b4..19315eab010e 100644 --- a/node/network/availability-distribution/src/requester/fetch_task.rs +++ b/node/network/availability-distribution/src/requester/fetch_task.rs @@ -50,6 +50,7 @@ pub struct FetchTaskConfig { live_in: HashSet, } +/// Information about a task fetching an erasure chunk. pub struct FetchTask { /// For what relay parents this task is relevant. /// @@ -90,7 +91,9 @@ struct RunningTask { /// For what session we have been spawned. session_index: SessionIndex, - /// Index of validator group. + /// Index of validator group to fetch the chunk from. + /// + /// Needef for reporting bad validators. group_index: GroupIndex, /// Validators to request the chunk from. @@ -134,7 +137,9 @@ impl FetchTaskConfig { let prepared_running = RunningTask { session_index: session_info.session_index, group_index: core.group_responsible, - group: session_info.validator_groups.get(core.group_responsible.0 as usize).expect("The responsible group of a candidate should be available in the corresponding session. qed.").clone(), + group: session_info.validator_groups.get(core.group_responsible.0 as usize) + .expect("The responsible group of a candidate should be available in the corresponding session. qed.") + .clone(), request: AvailabilityFetchingRequest { candidate_hash: core.candidate_hash, index: session_info.our_index, @@ -184,7 +189,7 @@ impl FetchTask { /// Add the given leaf to the relay parents which are making this task relevant. /// - /// This is for book keeping, so we know we are already fetching a chunk. + /// This is for book keeping, so we know we are already fetching a given chunk. pub fn add_leaf(&mut self, leaf: Hash) { self.live_in.insert(leaf); } diff --git a/node/network/availability-distribution/src/responder.rs b/node/network/availability-distribution/src/responder.rs index 8208ed39c057..1d6e886edf80 100644 --- a/node/network/availability-distribution/src/responder.rs +++ b/node/network/availability-distribution/src/responder.rs @@ -46,6 +46,7 @@ where req.send_response(response).map_err(|_| Error::SendResponse) } +/// Query chunk from the availability store. #[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))] async fn query_chunk( ctx: &mut Context, diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs index 874671fbefcb..d10d59b7cc01 100644 --- a/node/network/availability-distribution/src/session_cache.rs +++ b/node/network/availability-distribution/src/session_cache.rs @@ -40,7 +40,6 @@ use super::{ /// Caching of session info as needed by availability distribution. /// /// It should be ensured that a cached session stays live in the cache as long as we might need it. -/// A warning will be logged, if an already dead entry gets fetched. pub struct SessionCache { /// Get the session index for a given relay parent. /// @@ -52,7 +51,8 @@ pub struct SessionCache { /// /// Note: Performance of fetching is really secondary here, but we need to ensure we are going /// to get any existing cache entry, before fetching new information, as we should not mess up - /// the order of validators. (We want live TCP connections wherever possible.) + /// the order of validators in `SessionInfo::validator_groups`. (We want live TCP connections + /// wherever possible.) session_info_cache: LruCache, /// Key store for determining whether we are a validator and what `ValidatorIndex` we have. @@ -64,32 +64,38 @@ pub struct SessionCache { pub struct SessionInfo { /// The index of this session. pub session_index: SessionIndex, + /// Validator groups of the current session. /// /// Each group's order is randomized. This way we achieve load balancing when requesting /// chunks, as the validators in a group will be tried in that randomized order. Each node - /// should arrive at a different order, therefore we distribute the load. + /// should arrive at a different order, therefore we distribute the load on individual + /// validators. pub validator_groups: Vec>, /// Information about ourself: pub our_index: ValidatorIndex, - /// Remember to which group we belong, so we won't start fetching chunks for candidates those - /// candidates (We should have them via PoV distribution). + /// Remember to which group we belong, so we won't start fetching chunks for candidates with + /// our group being responsible. (We should have that chunk already.) pub our_group: GroupIndex, } /// Report of bad validators. +/// +/// Fetching tasks will report back validators that did not respond as expected, so we can re-order +/// them. pub struct BadValidators { /// The session index that was used. pub session_index: SessionIndex, - /// The group the not properly responding validators are. + /// The group, the not properly responding validators belong to. pub group_index: GroupIndex, - /// The indeces of the bad validators. + /// The list of bad validators. pub bad_validators: Vec, } impl SessionCache { + /// Create a new `SessionCache`. pub fn new(keystore: SyncCryptoStorePtr) -> Self { SessionCache { // 5 relatively conservative, 1 to 2 should suffice: @@ -104,7 +110,7 @@ impl SessionCache { /// /// If this node is not a validator, the function will return `None`. /// - /// Use this function over `fetch_session_info` if all you need is a reference to + /// Use this function over any `fetch_session_info` if all you need is a reference to /// `SessionInfo`, as it avoids an expensive clone. pub async fn with_session_info( &mut self, @@ -170,7 +176,7 @@ impl SessionCache { /// Query needed information from runtime. /// /// We need to pass in the relay parent for our call to `request_session_info_ctx`. We should - /// actually don't need that, I suppose it is used for internal caching based on relay parents, + /// actually don't need that: I suppose it is used for internal caching based on relay parents, /// which we don't use here. It should not do any harm though. async fn query_info_from_runtime( &self, @@ -204,7 +210,6 @@ impl SessionCache { } }) }) - // TODO: Make sure this is correct and should be enforced: .expect("Every validator should be in a validator group. qed."); // Shuffle validators in groups: @@ -237,9 +242,9 @@ impl SessionCache { return Ok(None); } - /// Get our validator id and the validators in the current session. + /// Get our `ValidatorIndex`. /// - /// Returns: Ok(None) if we are not a validator. + /// Returns: None if we are not a validator. async fn get_our_index(&self, validators: Vec) -> Option { for (i, v) in validators.iter().enumerate() { if CryptoStore::has_keys(&*self.keystore, &[(v.to_raw_vec(), ValidatorId::ID)]) From 770775949db41c9a4cb0ff8884c070b07b76f7bc Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Mon, 22 Feb 2021 20:06:35 +0100 Subject: [PATCH 37/60] Fix test suite. --- node/network/availability-distribution/src/requester.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node/network/availability-distribution/src/requester.rs b/node/network/availability-distribution/src/requester.rs index 4218321b4938..f6fe101b500b 100644 --- a/node/network/availability-distribution/src/requester.rs +++ b/node/network/availability-distribution/src/requester.rs @@ -39,7 +39,7 @@ use polkadot_subsystem::{ messages::AllMessages, ActiveLeavesUpdate, jaeger, SubsystemContext, }; -use super::{error::recv_runtime, session_cache::SessionCache, Result}; +use super::{error::recv_runtime, session_cache::SessionCache, Result, LOG_TARGET}; /// A task fetching a particular chunk. mod fetch_task; From e7623d4df62d356db88c303dc2631aad08b6adf2 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Mon, 22 Feb 2021 21:04:02 +0100 Subject: [PATCH 38/60] Get rid of now redundant message types. --- .../availability-distribution/src/lib.rs | 5 ----- node/network/bridge/src/lib.rs | 17 +++-------------- node/network/protocol/src/lib.rs | 12 ------------ node/subsystem/src/messages.rs | 3 --- 4 files changed, 3 insertions(+), 34 deletions(-) diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs index 3226d9d972d5..9912c4c85242 100644 --- a/node/network/availability-distribution/src/lib.rs +++ b/node/network/availability-distribution/src/lib.rs @@ -118,11 +118,6 @@ impl AvailabilityDistributionSubsystem { } => { answer_request(&mut ctx, req).await? } - FromOverseer::Communication { - msg: AvailabilityDistributionMessage::NetworkBridgeUpdateV1(_), - } => { - // There are currently no bridge updates we are interested in. - } } } } diff --git a/node/network/bridge/src/lib.rs b/node/network/bridge/src/lib.rs index 720cea102168..309b560bb7e2 100644 --- a/node/network/bridge/src/lib.rs +++ b/node/network/bridge/src/lib.rs @@ -28,8 +28,8 @@ use polkadot_subsystem::{ SubsystemResult, jaeger, }; use polkadot_subsystem::messages::{ - NetworkBridgeMessage, AllMessages, AvailabilityDistributionMessage, - BitfieldDistributionMessage, PoVDistributionMessage, StatementDistributionMessage, + NetworkBridgeMessage, AllMessages, BitfieldDistributionMessage, + PoVDistributionMessage, StatementDistributionMessage, CollatorProtocolMessage, ApprovalDistributionMessage, NetworkBridgeEvent, }; use polkadot_primitives::v1::{Hash, BlockNumber}; @@ -567,10 +567,6 @@ async fn dispatch_validation_events_to_all( I::IntoIter: Send, { let messages_for = |event: NetworkBridgeEvent| { - let a = std::iter::once(event.focus().ok().map(|m| AllMessages::AvailabilityDistribution( - AvailabilityDistributionMessage::NetworkBridgeUpdateV1(m) - ))); - let b = std::iter::once(event.focus().ok().map(|m| AllMessages::BitfieldDistribution( BitfieldDistributionMessage::NetworkBridgeUpdateV1(m) ))); @@ -587,7 +583,7 @@ async fn dispatch_validation_events_to_all( ApprovalDistributionMessage::NetworkBridgeUpdateV1(m) ))); - a.chain(b).chain(p).chain(s).chain(ap).filter_map(|x| x) + b.chain(p).chain(s).chain(ap).filter_map(|x| x) }; ctx.send_messages(events.into_iter().flat_map(messages_for)).await @@ -817,13 +813,6 @@ mod tests { event: NetworkBridgeEvent, virtual_overseer: &mut TestSubsystemContextHandle, ) { - assert_matches!( - virtual_overseer.recv().await, - AllMessages::AvailabilityDistribution( - AvailabilityDistributionMessage::NetworkBridgeUpdateV1(e) - ) if e == event.focus().expect("could not focus message") - ); - assert_matches!( virtual_overseer.recv().await, AllMessages::BitfieldDistribution( diff --git a/node/network/protocol/src/lib.rs b/node/network/protocol/src/lib.rs index 2f7547f2de46..3d5d8351bb68 100644 --- a/node/network/protocol/src/lib.rs +++ b/node/network/protocol/src/lib.rs @@ -252,14 +252,6 @@ pub mod v1 { use super::RequestId; use std::convert::TryFrom; - /// Network messages used by the availability distribution subsystem - #[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)] - pub enum AvailabilityDistributionMessage { - /// An erasure chunk for a given candidate hash. - #[codec(index = 0)] - Chunk(CandidateHash, ErasureChunk), - } - /// Network messages used by the availability recovery subsystem. #[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)] pub enum AvailabilityRecoveryMessage { @@ -408,9 +400,6 @@ pub mod v1 { /// All network messages on the validation peer-set. #[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)] pub enum ValidationProtocol { - /// Availability distribution messages - #[codec(index = 0)] - AvailabilityDistribution(AvailabilityDistributionMessage), /// Bitfield distribution messages #[codec(index = 1)] BitfieldDistribution(BitfieldDistributionMessage), @@ -428,7 +417,6 @@ pub mod v1 { ApprovalDistribution(ApprovalDistributionMessage), } - impl_try_from!(ValidationProtocol, AvailabilityDistribution, AvailabilityDistributionMessage); impl_try_from!(ValidationProtocol, BitfieldDistribution, BitfieldDistributionMessage); impl_try_from!(ValidationProtocol, PoVDistribution, PoVDistributionMessage); impl_try_from!(ValidationProtocol, StatementDistribution, StatementDistributionMessage); diff --git a/node/subsystem/src/messages.rs b/node/subsystem/src/messages.rs index 857b1e90ae96..629b25df881e 100644 --- a/node/subsystem/src/messages.rs +++ b/node/subsystem/src/messages.rs @@ -269,8 +269,6 @@ impl NetworkBridgeMessage { /// Availability Distribution Message. #[derive(Debug, derive_more::From)] pub enum AvailabilityDistributionMessage { - /// Event from the network bridge. - NetworkBridgeUpdateV1(NetworkBridgeEvent), /// Incoming request for an availability chunk. AvailabilityFetchingRequest(IncomingRequest) } @@ -293,7 +291,6 @@ impl AvailabilityDistributionMessage { /// If the current variant contains the relay parent hash, return it. pub fn relay_parent(&self) -> Option { match self { - Self::NetworkBridgeUpdateV1(_) => None, Self::AvailabilityFetchingRequest(_) => None, } } From e8d7e44cd3c95412d34018593ac038522d8037b8 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Mon, 22 Feb 2021 22:31:24 +0100 Subject: [PATCH 39/60] WIP --- node/network/availability-distribution/src/lib.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs index 9912c4c85242..d8389d4933d2 100644 --- a/node/network/availability-distribution/src/lib.rs +++ b/node/network/availability-distribution/src/lib.rs @@ -39,17 +39,18 @@ use responder::answer_request; /// Cache for session information. mod session_cache; -const LOG_TARGET: &'static str = "availability_distribution"; +mod metrics; +/// Prometheus `Metrics` for availability distribution. +pub use metrics::Metrics; -/// Availability Distribution metrics. -type Metrics = (); +const LOG_TARGET: &'static str = "availability_distribution"; /// The availability distribution subsystem. pub struct AvailabilityDistributionSubsystem { /// Pointer to a keystore, which is required for determining this nodes validator index. keystore: SyncCryptoStorePtr, - //// Prometheus metrics. - // metrics: Metrics, + /// Prometheus metrics. + metrics: Metrics, } impl Subsystem for AvailabilityDistributionSubsystem @@ -71,8 +72,8 @@ where impl AvailabilityDistributionSubsystem { /// Create a new instance of the availability distribution. - pub fn new(keystore: SyncCryptoStorePtr, _metrics: Metrics) -> Self { - Self { keystore } + pub fn new(keystore: SyncCryptoStorePtr, metrics: Metrics) -> Self { + Self { keystore, metrics } } /// Start processing work as passed on from the Overseer. From 5fb84180e0f2a2db1f4ed27c94683d3609fb02c0 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Mon, 22 Feb 2021 22:18:46 +0100 Subject: [PATCH 40/60] Rob's review remarks. --- .../availability-distribution/src/requester/fetch_task.rs | 2 +- node/network/availability-distribution/src/session_cache.rs | 3 ++- node/network/protocol/src/request_response/v1.rs | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/node/network/availability-distribution/src/requester/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task.rs index 19315eab010e..05f70ec5a8cb 100644 --- a/node/network/availability-distribution/src/requester/fetch_task.rs +++ b/node/network/availability-distribution/src/requester/fetch_task.rs @@ -262,7 +262,7 @@ impl RunningTask { }; let chunk = match resp { AvailabilityFetchingResponse::Chunk(resp) => { - resp.reconstruct_erasure_chunk(&self.request) + resp.recombine_into_chunk(&self.request) } AvailabilityFetchingResponse::NoSuchChunk => { tracing::debug!(target: LOG_TARGET, validator = ?validator, "Validator did not have our chunk"); diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs index d10d59b7cc01..672ada0896bc 100644 --- a/node/network/availability-distribution/src/session_cache.rs +++ b/node/network/availability-distribution/src/session_cache.rs @@ -225,7 +225,8 @@ impl SessionCache { .into_iter() .map(|index| { discovery_keys.get(index.0 as usize) - .expect("There should be a discovery key for each validator of each validator group. qed.").clone() + .expect("There should be a discovery key for each validator of each validator group. qed.") + .clone() }) .collect() }) diff --git a/node/network/protocol/src/request_response/v1.rs b/node/network/protocol/src/request_response/v1.rs index 22724c1f44b7..4f8c968b8fd5 100644 --- a/node/network/protocol/src/request_response/v1.rs +++ b/node/network/protocol/src/request_response/v1.rs @@ -59,13 +59,13 @@ pub struct ChunkResponse { impl From for ChunkResponse { fn from(ErasureChunk {chunk, index: _, proof}: ErasureChunk) -> Self { - ChunkResponse { chunk, proof} + ChunkResponse {chunk, proof} } } impl ChunkResponse { /// Re-build an `ErasureChunk` from response and request. - pub fn reconstruct_erasure_chunk(self, req: &AvailabilityFetchingRequest) -> ErasureChunk { + pub fn recombine_into_chunk(self, req: &AvailabilityFetchingRequest) -> ErasureChunk { ErasureChunk { chunk: self.chunk, proof: self.proof, From 9780f3a3c8329a73bb8a585ebdf3fb16cd9e270f Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Mon, 22 Feb 2021 23:13:50 +0100 Subject: [PATCH 41/60] Fix test suite. --- node/overseer/src/lib.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/node/overseer/src/lib.rs b/node/overseer/src/lib.rs index cd9f1d4f1d89..da0ebca25ed6 100644 --- a/node/overseer/src/lib.rs +++ b/node/overseer/src/lib.rs @@ -2717,10 +2717,6 @@ mod tests { StatementDistributionMessage::NetworkBridgeUpdateV1(test_network_bridge_event()) } - fn test_availability_distribution_msg() -> AvailabilityDistributionMessage { - AvailabilityDistributionMessage::NetworkBridgeUpdateV1(test_network_bridge_event()) - } - fn test_availability_recovery_msg() -> AvailabilityRecoveryMessage { let (sender, _) = oneshot::channel(); AvailabilityRecoveryMessage::RecoverAvailableData( @@ -2828,7 +2824,6 @@ mod tests { handler.send_msg(AllMessages::CollationGeneration(test_collator_generation_msg())).await; handler.send_msg(AllMessages::CollatorProtocol(test_collator_protocol_msg())).await; handler.send_msg(AllMessages::StatementDistribution(test_statement_distribution_msg())).await; - handler.send_msg(AllMessages::AvailabilityDistribution(test_availability_distribution_msg())).await; handler.send_msg(AllMessages::AvailabilityRecovery(test_availability_recovery_msg())).await; // handler.send_msg(AllMessages::BitfieldSigning(test_bitfield_signing_msg())).await; handler.send_msg(AllMessages::BitfieldDistribution(test_bitfield_distribution_msg())).await; @@ -2851,8 +2846,8 @@ mod tests { assert_eq!(stop_signals_received.load(atomic::Ordering::SeqCst), NUM_SUBSYSTEMS); // x2 because of broadcast_signal on startup assert_eq!(signals_received.load(atomic::Ordering::SeqCst), NUM_SUBSYSTEMS); - // -1 for BitfieldSigning - assert_eq!(msgs_received.load(atomic::Ordering::SeqCst), NUM_SUBSYSTEMS - 1); + // -2 for BitfieldSigning and Availability distribution + assert_eq!(msgs_received.load(atomic::Ordering::SeqCst), NUM_SUBSYSTEMS - 2); assert!(res.is_ok()); }, From 5bbcea45cd0a5401e2f91395797f585005605c05 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Mon, 22 Feb 2021 23:13:58 +0100 Subject: [PATCH 42/60] core.relay_parent -> leaf for session request. --- node/network/availability-distribution/src/requester.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/node/network/availability-distribution/src/requester.rs b/node/network/availability-distribution/src/requester.rs index f6fe101b500b..1a080853333c 100644 --- a/node/network/availability-distribution/src/requester.rs +++ b/node/network/availability-distribution/src/requester.rs @@ -160,7 +160,10 @@ impl Requester { .session_cache .with_session_info( ctx, - core.candidate_descriptor.relay_parent, + // We use leaf here, as relay_parent must be in the same session as the + // leaf. (Cores are dropped at session boundaries.) At the same time, + // only leaves are guaranteed to be fetchable by the state trie. + leaf, |info| FetchTaskConfig::new(leaf, &core, tx, info), ) .await?; From b792a89ed343cc4276d28ed39a7c04fe1d9b9ef2 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Mon, 22 Feb 2021 23:21:20 +0100 Subject: [PATCH 43/60] Style fix. --- .../src/requester/fetch_task.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/node/network/availability-distribution/src/requester/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task.rs index 05f70ec5a8cb..6b2612096a76 100644 --- a/node/network/availability-distribution/src/requester/fetch_task.rs +++ b/node/network/availability-distribution/src/requester/fetch_task.rs @@ -335,11 +335,11 @@ impl RunningTask { Ok(hash) => hash, Err(e) => { tracing::trace!( - target: LOG_TARGET, - candidate_hash = ?self.request.candidate_hash, - origin = ?validator, - error = ?e, - "Failed to calculate chunk merkle proof", + target: LOG_TARGET, + candidate_hash = ?self.request.candidate_hash, + origin = ?validator, + error = ?e, + "Failed to calculate chunk merkle proof", ); return false; } From 75e6af8728be9b0f8287557114b2f2f3d2a78ad5 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Mon, 22 Feb 2021 23:26:59 +0100 Subject: [PATCH 44/60] Decrease request timeout. --- node/network/protocol/src/request_response.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/node/network/protocol/src/request_response.rs b/node/network/protocol/src/request_response.rs index 7d30fe76358f..2160d1905cab 100644 --- a/node/network/protocol/src/request_response.rs +++ b/node/network/protocol/src/request_response.rs @@ -60,7 +60,11 @@ pub enum Protocol { } /// Default request timeout in seconds. -const DEFAULT_REQUEST_TIMEOUT: u64 = 8; +/// +/// When decreasing this value, take into account that the very first request might need to open a +/// connection, which can be slow. If this causes problems, we should ensure connectivity via peer +/// sets. +const DEFAULT_REQUEST_TIMEOUT: u64 = 3; impl Protocol { /// Get a configuration for a given Request response protocol. From 53fdeb34d067d6ab824e1331348369798f798160 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Tue, 23 Feb 2021 18:56:54 +0100 Subject: [PATCH 45/60] Cleanup obsolete errors. --- node/network/availability-distribution/src/error.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs index f66d418d8e35..354e9c255e32 100644 --- a/node/network/availability-distribution/src/error.rs +++ b/node/network/availability-distribution/src/error.rs @@ -28,15 +28,9 @@ use polkadot_subsystem::{errors::RuntimeApiError, SubsystemError}; /// Errors of this subsystem. #[derive(Debug, Error)] pub enum Error { - #[error("Response channel to obtain StoreChunk failed")] - StoreChunkResponseChannel(#[source] oneshot::Canceled), - #[error("Response channel to obtain QueryChunk failed")] QueryChunkResponseChannel(#[source] oneshot::Canceled), - #[error("Response channel to obtain AvailabilityCores failed")] - QueryAvailabilityResponseChannel(#[source] oneshot::Canceled), - #[error("Receive channel closed")] IncomingMessageChannel(#[source] SubsystemError), From ce21a10bb1479f35b145ac35eaf30477399d0e48 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Tue, 23 Feb 2021 19:00:30 +0100 Subject: [PATCH 46/60] Metrics + don't fail on non fatal errors. --- .../availability-distribution/src/lib.rs | 8 +- .../availability-distribution/src/metrics.rs | 112 ++++++++++++++++++ .../src/requester.rs | 35 +++--- .../src/requester/fetch_task.rs | 23 +++- .../src/responder.rs | 37 +++++- .../src/session_cache.rs | 15 +++ 6 files changed, 207 insertions(+), 23 deletions(-) create mode 100644 node/network/availability-distribution/src/metrics.rs diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs index d8389d4933d2..4e8683a0920e 100644 --- a/node/network/availability-distribution/src/lib.rs +++ b/node/network/availability-distribution/src/lib.rs @@ -34,7 +34,7 @@ use requester::Requester; /// Responding to erasure chunk requests: mod responder; -use responder::answer_request; +use responder::answer_request_log; /// Cache for session information. mod session_cache; @@ -81,7 +81,7 @@ impl AvailabilityDistributionSubsystem { where Context: SubsystemContext + Sync + Send, { - let mut requester = Requester::new(self.keystore.clone()).fuse(); + let mut requester = Requester::new(self.keystore.clone(), self.metrics.clone()).fuse(); loop { let action = { let mut subsystem_next = ctx.recv().fuse(); @@ -97,7 +97,7 @@ impl AvailabilityDistributionSubsystem { subsystem_msg.map_err(|e| Error::IncomingMessageChannel(e))? } Either::Right(from_task) => { - let from_task = from_task.ok_or(Error::RequesterExhausted)??; + let from_task = from_task.ok_or(Error::RequesterExhausted)?; ctx.send_message(from_task).await; continue; } @@ -117,7 +117,7 @@ impl AvailabilityDistributionSubsystem { FromOverseer::Communication { msg: AvailabilityDistributionMessage::AvailabilityFetchingRequest(req), } => { - answer_request(&mut ctx, req).await? + answer_request_log(&mut ctx, req, &self.metrics).await } } } diff --git a/node/network/availability-distribution/src/metrics.rs b/node/network/availability-distribution/src/metrics.rs new file mode 100644 index 000000000000..1e44028a1ef9 --- /dev/null +++ b/node/network/availability-distribution/src/metrics.rs @@ -0,0 +1,112 @@ +// Copyright 2021 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use polkadot_node_subsystem_util::metrics::prometheus::{Counter, U64, Registry, PrometheusError, CounterVec, Opts}; +use polkadot_node_subsystem_util::metrics::prometheus; +use polkadot_node_subsystem_util::metrics; + +/// Label for success counters. +pub const SUCCEEDED: &'static str = "succeeded"; + +/// Label for fail counters. +pub const FAILED: &'static str = "failed"; + +/// Label for chunks that could not be served, because they were not available. +pub const NOT_FOUND: &'static str = "not-found"; + +/// Availability Distribution metrics. +#[derive(Clone, Default)] +pub struct Metrics(Option); + + +#[derive(Clone)] +struct MetricsInner { + /// Number of chunks fetched. + /// + /// Note: The failed count gets incremented, when we were not able to fetch the chunk at all. + /// For times, where we failed downloading, but succeeded on the next try (with different + /// backers), see `retries`. + fetched_chunks: CounterVec, + + /// Number of chunks served. + /// + /// Note: Right now, `Succeeded` gets incremented whenever we were able to successfully respond + /// to a chunk request. This includes `NoSuchChunk` responses. + served_chunks: CounterVec, + + /// Number of times our first set of validators did not provide the needed chunk and we had to + /// query further validators. + retries: Counter, +} + +impl Metrics { + /// Increment counter on fetched labels. + pub fn on_fetch(&self, label: &'static str) { + if let Some(metrics) = &self.0 { + metrics.fetched_chunks.with_label_values(&[label]).inc() + } + } + + /// Increment counter on served chunks. + pub fn on_served(&self, label: &'static str) { + if let Some(metrics) = &self.0 { + metrics.served_chunks.with_label_values(&[label]).inc() + } + } + + /// Increment retry counter. + pub fn on_retry(&self) { + if let Some(metrics) = &self.0 { + metrics.retries.inc() + } + } +} + +impl metrics::Metrics for Metrics { + fn try_register(registry: &Registry) -> Result { + let metrics = MetricsInner { + fetched_chunks: prometheus::register( + CounterVec::new( + Opts::new( + "Number of fetched chunks", + "Total number of fetched chunks.", + ), + &[FAILED, SUCCEEDED] + )?, + registry, + )?, + served_chunks: prometheus::register( + CounterVec::new( + Opts::new( + "Number of served chunks", + "Total number of chunks served by this backer.", + ), + &[FAILED, SUCCEEDED, NOT_FOUND] + )?, + registry, + )?, + retries: prometheus::register( + Counter::new( + "Number of retries", + "Number of times we did not succeed in fetching a chunk and needed to try more backers.", + )?, + registry, + )?, + }; + Ok(Metrics(Some(metrics))) + } +} + diff --git a/node/network/availability-distribution/src/requester.rs b/node/network/availability-distribution/src/requester.rs index 1a080853333c..a9b95b813cde 100644 --- a/node/network/availability-distribution/src/requester.rs +++ b/node/network/availability-distribution/src/requester.rs @@ -39,7 +39,7 @@ use polkadot_subsystem::{ messages::AllMessages, ActiveLeavesUpdate, jaeger, SubsystemContext, }; -use super::{error::recv_runtime, session_cache::SessionCache, Result, LOG_TARGET}; +use super::{error::recv_runtime, session_cache::SessionCache, Result, LOG_TARGET, Metrics}; /// A task fetching a particular chunk. mod fetch_task; @@ -64,6 +64,9 @@ pub struct Requester { /// Receive messages from `FetchTask`. rx: mpsc::Receiver, + + /// Prometheus Metrics + metrics: Metrics, } impl Requester { @@ -71,7 +74,7 @@ impl Requester { /// /// You must feed it with `ActiveLeavesUpdate` via `update_fetching_heads` and make it progress /// by advancing the stream. - pub fn new(keystore: SyncCryptoStorePtr) -> Self { + pub fn new(keystore: SyncCryptoStorePtr, metrics: Metrics) -> Self { // All we do is forwarding messages, no need to make this big. let (tx, rx) = mpsc::channel(1); Requester { @@ -79,6 +82,7 @@ impl Requester { session_cache: SessionCache::new(keystore), tx, rx, + metrics, } } /// Update heads that need availability distribution. @@ -155,6 +159,7 @@ impl Requester { } Entry::Vacant(e) => { let tx = self.tx.clone(); + let metrics = self.metrics.clone(); let task_cfg = self .session_cache @@ -164,7 +169,7 @@ impl Requester { // leaf. (Cores are dropped at session boundaries.) At the same time, // only leaves are guaranteed to be fetchable by the state trie. leaf, - |info| FetchTaskConfig::new(leaf, &core, tx, info), + |info| FetchTaskConfig::new(leaf, &core, tx, metrics, info), ) .await?; @@ -180,26 +185,26 @@ impl Requester { } impl Stream for Requester { - type Item = Result; + type Item = AllMessages; fn poll_next( mut self: Pin<&mut Self>, ctx: &mut Context, - ) -> Poll>> { + ) -> Poll> { loop { match Pin::new(&mut self.rx).poll_next(ctx) { - Poll::Ready(Some(FromFetchTask::Message(m))) => { - return Poll::Ready(Some(Ok(m))) - } + Poll::Ready(Some(FromFetchTask::Message(m))) => + return Poll::Ready(Some(m)), Poll::Ready(Some(FromFetchTask::Concluded(Some(bad_boys)))) => { - match self.session_cache.report_bad(bad_boys) { - Err(err) => return Poll::Ready(Some(Err(err))), - Ok(()) => continue, - } + self.session_cache.report_bad_log(bad_boys); + continue } - Poll::Ready(Some(FromFetchTask::Concluded(None))) => continue, - Poll::Ready(None) => return Poll::Ready(None), - Poll::Pending => return Poll::Pending, + Poll::Ready(Some(FromFetchTask::Concluded(None))) => + continue, + Poll::Ready(None) => + return Poll::Ready(None), + Poll::Pending => + return Poll::Pending, } } } diff --git a/node/network/availability-distribution/src/requester/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task.rs index 6b2612096a76..10d0821a23d0 100644 --- a/node/network/availability-distribution/src/requester/fetch_task.rs +++ b/node/network/availability-distribution/src/requester/fetch_task.rs @@ -39,6 +39,7 @@ use crate::{ error::{Error, Result}, session_cache::{BadValidators, SessionInfo}, LOG_TARGET, + metrics::{Metrics, SUCCEEDED, FAILED}, }; /// Configuration for a `FetchTask` @@ -112,6 +113,9 @@ struct RunningTask { /// Sender for communicating with other subsystems and reporting results. sender: mpsc::Sender, + + /// Prometheues metrics for reporting results. + metrics: Metrics, } impl FetchTaskConfig { @@ -122,6 +126,7 @@ impl FetchTaskConfig { leaf: Hash, core: &OccupiedCore, sender: mpsc::Sender, + metrics: Metrics, session_info: &SessionInfo, ) -> Self { let live_in = vec![leaf].into_iter().collect(); @@ -146,6 +151,7 @@ impl FetchTaskConfig { }, erasure_root: core.candidate_descriptor.erasure_root, relay_parent: core.candidate_descriptor.relay_parent, + metrics, sender, }; FetchTaskConfig { @@ -243,8 +249,16 @@ impl RunningTask { /// Try validators in backing group in order. async fn run_inner(mut self) { let mut bad_validators = Vec::new(); + let mut label = FAILED; + let mut count: u32 = 0; // Try validators in reverse order: while let Some(validator) = self.group.pop() { + // Report retries: + if count > 0 { + self.metrics.on_retry(); + } + count +=1; + // Send request: let resp = match self.do_request(&validator).await { Ok(resp) => resp, @@ -253,6 +267,7 @@ impl RunningTask { target: LOG_TARGET, "Node seems to be shutting down, canceling fetch task" ); + self.metrics.on_fetch(FAILED); return } Err(TaskError::PeerError) => { @@ -265,7 +280,11 @@ impl RunningTask { resp.recombine_into_chunk(&self.request) } AvailabilityFetchingResponse::NoSuchChunk => { - tracing::debug!(target: LOG_TARGET, validator = ?validator, "Validator did not have our chunk"); + tracing::debug!( + target: LOG_TARGET, + validator = ?validator, + "Validator did not have our chunk" + ); bad_validators.push(validator); continue } @@ -279,8 +298,10 @@ impl RunningTask { // Ok, let's store it and be happy: self.store_chunk(chunk).await; + label = SUCCEEDED; break; } + self.metrics.on_fetch(label); self.conclude(bad_validators).await; } diff --git a/node/network/availability-distribution/src/responder.rs b/node/network/availability-distribution/src/responder.rs index 1d6e886edf80..c094b17fd666 100644 --- a/node/network/availability-distribution/src/responder.rs +++ b/node/network/availability-distribution/src/responder.rs @@ -26,24 +26,55 @@ use polkadot_subsystem::{ }; use crate::error::{Error, Result}; -use crate::LOG_TARGET; +use crate::{LOG_TARGET, metrics::{Metrics, SUCCEEDED, FAILED, NOT_FOUND}}; + +/// Variant of `answer_request` that does Prometheus metric and logging on errors. +/// +/// Any errors of `answer_request` will simply be logged. +pub async fn answer_request_log( + ctx: &mut Context, + req: IncomingRequest, + metrics: &Metrics, +) -> () +where + Context: SubsystemContext, +{ + let res = answer_request(ctx, req).await; + match res { + Ok(result) => + metrics.on_served(if result {SUCCEEDED} else {NOT_FOUND}), + Err(err) => { + tracing::warn!( + target: LOG_TARGET, + err= ?err, + "Serving chunk failed with error" + ); + metrics.on_served(FAILED); + } + } +} /// Answer an incoming chunk request by querying the av store. +/// +/// Returns: Ok(true) if chunk was found and served. pub async fn answer_request( ctx: &mut Context, req: IncomingRequest, -) -> Result<()> +) -> Result where Context: SubsystemContext, { let chunk = query_chunk(ctx, req.payload.candidate_hash, req.payload.index).await?; + let result = chunk.is_some(); + let response = match chunk { None => v1::AvailabilityFetchingResponse::NoSuchChunk, Some(chunk) => v1::AvailabilityFetchingResponse::Chunk(chunk.into()), }; - req.send_response(response).map_err(|_| Error::SendResponse) + req.send_response(response).map_err(|_| Error::SendResponse)?; + Ok(result) } /// Query chunk from the availability store. diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs index 672ada0896bc..d3081f35268a 100644 --- a/node/network/availability-distribution/src/session_cache.rs +++ b/node/network/availability-distribution/src/session_cache.rs @@ -35,6 +35,7 @@ use polkadot_subsystem::SubsystemContext; use super::{ error::{recv_runtime, Result}, Error, + LOG_TARGET, }; /// Caching of session info as needed by availability distribution. @@ -148,6 +149,20 @@ impl SessionCache { Ok(None) } + /// Variant of `report_bad` that never fails, but just logs errors. + /// + /// Not being able to report bad validators is not fatal, so we should not shutdown the + /// subsystem on this. + pub fn report_bad_log(&mut self, report: BadValidators) { + if let Err(err) = self.report_bad(report) { + tracing::warn!( + target: LOG_TARGET, + err= ?err, + "Reporting bad validators failed with error" + ); + } + } + /// Make sure we try unresponsive or misbehaving validators last. /// /// We assume validators in a group are tried in reverse order, so the reported bad validators From 64d72469b827a59f548275724996229b3186190f Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Tue, 23 Feb 2021 19:03:07 +0100 Subject: [PATCH 47/60] requester.rs -> requester/mod.rs --- .../src/{requester.rs => requester/mod.rs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename node/network/availability-distribution/src/{requester.rs => requester/mod.rs} (100%) diff --git a/node/network/availability-distribution/src/requester.rs b/node/network/availability-distribution/src/requester/mod.rs similarity index 100% rename from node/network/availability-distribution/src/requester.rs rename to node/network/availability-distribution/src/requester/mod.rs From 2a9650f58f1233a86e5a4d6344484bdfa5a907b1 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Tue, 23 Feb 2021 19:35:04 +0100 Subject: [PATCH 48/60] Panic on invalid BadValidator report. --- node/network/availability-distribution/src/error.rs | 6 +++--- node/network/availability-distribution/src/session_cache.rs | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs index 354e9c255e32..dbe3ad56db16 100644 --- a/node/network/availability-distribution/src/error.rs +++ b/node/network/availability-distribution/src/error.rs @@ -54,9 +54,9 @@ pub enum Error { #[error("Spawning subsystem task failed")] SpawnTask(#[source] SubsystemError), - /// Reporting bad validators failed. - #[error("Reporting bad validators failed")] - ReportBadValidators(&'static str), + /// We tried accessing a session that was not cached. + #[error("Session is not cached.")] + NoSuchCachedSession, /// Requester stream exhausted. #[error("Erasure chunk requester stream exhausted")] diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs index d3081f35268a..395d2ae78384 100644 --- a/node/network/availability-distribution/src/session_cache.rs +++ b/node/network/availability-distribution/src/session_cache.rs @@ -167,15 +167,16 @@ impl SessionCache { /// /// We assume validators in a group are tried in reverse order, so the reported bad validators /// will be put at the beginning of the group. + #[tracing::instrument(level = "trace", skip(self, report), fields(subsystem = LOG_TARGET))] pub fn report_bad(&mut self, report: BadValidators) -> Result<()> { let session = self .session_info_cache .get_mut(&report.session_index) - .ok_or(Error::ReportBadValidators("Session is not cached."))?; + .ok_or(Error::NoSuchCachedSession)?; let group = session .validator_groups .get_mut(report.group_index.0 as usize) - .ok_or(Error::ReportBadValidators("Validator group not found"))?; + .expect("A bad validator report must contain a valid group for the reported session. qed."); let bad_set = report.bad_validators.iter().collect::>(); // Get rid of bad boys: From 4d05d008adfd71dff9255d7b5b832134368eac77 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Tue, 23 Feb 2021 19:38:56 +0100 Subject: [PATCH 49/60] Fix indentation. --- node/network/availability-distribution/src/metrics.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/node/network/availability-distribution/src/metrics.rs b/node/network/availability-distribution/src/metrics.rs index 1e44028a1ef9..a70efd76071a 100644 --- a/node/network/availability-distribution/src/metrics.rs +++ b/node/network/availability-distribution/src/metrics.rs @@ -34,18 +34,18 @@ pub struct Metrics(Option); #[derive(Clone)] struct MetricsInner { - /// Number of chunks fetched. + /// Number of chunks fetched. /// /// Note: The failed count gets incremented, when we were not able to fetch the chunk at all. /// For times, where we failed downloading, but succeeded on the next try (with different /// backers), see `retries`. - fetched_chunks: CounterVec, + fetched_chunks: CounterVec, - /// Number of chunks served. + /// Number of chunks served. /// /// Note: Right now, `Succeeded` gets incremented whenever we were able to successfully respond /// to a chunk request. This includes `NoSuchChunk` responses. - served_chunks: CounterVec, + served_chunks: CounterVec, /// Number of times our first set of validators did not provide the needed chunk and we had to /// query further validators. From aadc80f9c1bca5e1f312e409a3502b06ddf94cdb Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Tue, 23 Feb 2021 19:41:10 +0100 Subject: [PATCH 50/60] Use typed default timeout constant. --- node/network/protocol/src/request_response.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/node/network/protocol/src/request_response.rs b/node/network/protocol/src/request_response.rs index 2160d1905cab..75eb33cfabb0 100644 --- a/node/network/protocol/src/request_response.rs +++ b/node/network/protocol/src/request_response.rs @@ -64,7 +64,7 @@ pub enum Protocol { /// When decreasing this value, take into account that the very first request might need to open a /// connection, which can be slow. If this causes problems, we should ensure connectivity via peer /// sets. -const DEFAULT_REQUEST_TIMEOUT: u64 = 3; +const DEFAULT_REQUEST_TIMEOUT: Duration = Duration::from_secs(3); impl Protocol { /// Get a configuration for a given Request response protocol. @@ -90,7 +90,7 @@ impl Protocol { max_request_size: 10_000, max_response_size: 1_000_000, // Also just some relative conservative guess: - request_timeout: Duration::from_secs(DEFAULT_REQUEST_TIMEOUT), + request_timeout: DEFAULT_REQUEST_TIMEOUT, inbound_queue: Some(tx), }, }; From e45f61c3e8f8d30f500a7ee0dd6fa7dbc6600993 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Tue, 23 Feb 2021 20:06:52 +0100 Subject: [PATCH 51/60] Make channel size 0, as each sender gets one slot anyways. --- node/network/availability-distribution/src/requester/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/node/network/availability-distribution/src/requester/mod.rs b/node/network/availability-distribution/src/requester/mod.rs index a9b95b813cde..914a86ef7def 100644 --- a/node/network/availability-distribution/src/requester/mod.rs +++ b/node/network/availability-distribution/src/requester/mod.rs @@ -76,7 +76,9 @@ impl Requester { /// by advancing the stream. pub fn new(keystore: SyncCryptoStorePtr, metrics: Metrics) -> Self { // All we do is forwarding messages, no need to make this big. - let (tx, rx) = mpsc::channel(1); + // Each sender will get one slot, see + // [here](https://docs.rs/futures/0.3.13/futures/channel/mpsc/fn.channel.html). + let (tx, rx) = mpsc::channel(0); Requester { fetches: HashMap::new(), session_cache: SessionCache::new(keystore), From 43dfd1ccbc1632879e8cfbd736f3f7589e4169b6 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Tue, 23 Feb 2021 21:41:01 +0100 Subject: [PATCH 52/60] Fix incorrect metrics initialization. --- node/network/availability-distribution/src/metrics.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/node/network/availability-distribution/src/metrics.rs b/node/network/availability-distribution/src/metrics.rs index a70efd76071a..0ece5acb38ee 100644 --- a/node/network/availability-distribution/src/metrics.rs +++ b/node/network/availability-distribution/src/metrics.rs @@ -84,7 +84,7 @@ impl metrics::Metrics for Metrics { "Number of fetched chunks", "Total number of fetched chunks.", ), - &[FAILED, SUCCEEDED] + &["success"] )?, registry, )?, @@ -94,7 +94,7 @@ impl metrics::Metrics for Metrics { "Number of served chunks", "Total number of chunks served by this backer.", ), - &[FAILED, SUCCEEDED, NOT_FOUND] + &["success"] )?, registry, )?, From 53531576eaadbbe9364d4f81c7979e774f6fbe5f Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Tue, 23 Feb 2021 21:49:12 +0100 Subject: [PATCH 53/60] Fix build after merge. --- runtime/parachains/src/inclusion.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/parachains/src/inclusion.rs b/runtime/parachains/src/inclusion.rs index aaafe0fbd939..364705a2c3b1 100644 --- a/runtime/parachains/src/inclusion.rs +++ b/runtime/parachains/src/inclusion.rs @@ -1256,7 +1256,7 @@ mod tests { let signed = block_on(sign_bitfield( &keystore, &validators[0], - 0, + ValidatorIndex(0), bare_bitfield, &signing_context, )); From ff944440e47ee1148c9860fdff2686f3f62c8ecf Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Tue, 23 Feb 2021 22:59:31 +0100 Subject: [PATCH 54/60] More fixes. --- node/core/approval-voting/src/lib.rs | 2 +- node/network/approval-distribution/src/lib.rs | 2 +- node/network/availability-recovery/src/lib.rs | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/node/core/approval-voting/src/lib.rs b/node/core/approval-voting/src/lib.rs index 00e8ce46fce9..ddc46a4d1c60 100644 --- a/node/core/approval-voting/src/lib.rs +++ b/node/core/approval-voting/src/lib.rs @@ -846,7 +846,7 @@ fn check_and_import_assignment( tracing::trace!( target: LOG_TARGET, "Imported assignment from validator {} on candidate {:?}", - assignment.validator, + assignment.validator.0, (assigned_candidate_hash, candidate_entry.candidate_receipt().descriptor.para_id), ); diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs index 42de2eceeab6..4d98d58ba79c 100644 --- a/node/network/approval-distribution/src/lib.rs +++ b/node/network/approval-distribution/src/lib.rs @@ -164,7 +164,7 @@ impl State { self.handle_peer_view_change(ctx, peer_id, view).await; } NetworkBridgeEvent::OurViewChange(view) => { - for head in &view.heads { + for head in view.iter() { if !self.blocks.contains_key(head) { self.pending_known.entry(*head).or_default(); } diff --git a/node/network/availability-recovery/src/lib.rs b/node/network/availability-recovery/src/lib.rs index ab192d492542..a18fe1eda96d 100644 --- a/node/network/availability-recovery/src/lib.rs +++ b/node/network/availability-recovery/src/lib.rs @@ -852,7 +852,7 @@ async fn handle_network_update( chunk.is_some(), request_id, candidate_hash, - validator_index, + validator_index.0, ); // Whatever the result, issue an @@ -882,7 +882,7 @@ async fn handle_network_update( chunk.is_some(), request_id, awaited_chunk.candidate_hash, - awaited_chunk.validator_index, + awaited_chunk.validator_index.0, ); // If there exists an entry under r_id, remove it. @@ -1003,7 +1003,7 @@ async fn issue_request( request_id, peer_id, awaited_chunk.candidate_hash, - awaited_chunk.validator_index, + awaited_chunk.validator_index.0, ); protocol_v1::AvailabilityRecoveryMessage::RequestChunk( @@ -1019,7 +1019,7 @@ async fn issue_request( request_id, peer_id, awaited_data.candidate_hash, - awaited_data.validator_index, + awaited_data.validator_index.0, ); protocol_v1::AvailabilityRecoveryMessage::RequestFullData( From 6b71e549b9d228afdbdc4f8298f7ae8de6fcd8d3 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Wed, 24 Feb 2021 09:47:16 +0100 Subject: [PATCH 55/60] Hopefully valid metrics names. --- node/network/availability-distribution/src/metrics.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/node/network/availability-distribution/src/metrics.rs b/node/network/availability-distribution/src/metrics.rs index 0ece5acb38ee..bdd6b94ae8b5 100644 --- a/node/network/availability-distribution/src/metrics.rs +++ b/node/network/availability-distribution/src/metrics.rs @@ -81,7 +81,7 @@ impl metrics::Metrics for Metrics { fetched_chunks: prometheus::register( CounterVec::new( Opts::new( - "Number of fetched chunks", + "fetched_chunks", "Total number of fetched chunks.", ), &["success"] @@ -91,7 +91,7 @@ impl metrics::Metrics for Metrics { served_chunks: prometheus::register( CounterVec::new( Opts::new( - "Number of served chunks", + "served_chunks", "Total number of chunks served by this backer.", ), &["success"] @@ -100,7 +100,7 @@ impl metrics::Metrics for Metrics { )?, retries: prometheus::register( Counter::new( - "Number of retries", + "fetch_retries", "Number of times we did not succeed in fetching a chunk and needed to try more backers.", )?, registry, From 190adaad94ce3267c380510b3000b863ccd79d39 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Thu, 25 Feb 2021 11:04:48 +0100 Subject: [PATCH 56/60] Better metrics names. --- node/network/availability-distribution/src/metrics.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/node/network/availability-distribution/src/metrics.rs b/node/network/availability-distribution/src/metrics.rs index bdd6b94ae8b5..b7c41b04b83f 100644 --- a/node/network/availability-distribution/src/metrics.rs +++ b/node/network/availability-distribution/src/metrics.rs @@ -81,7 +81,7 @@ impl metrics::Metrics for Metrics { fetched_chunks: prometheus::register( CounterVec::new( Opts::new( - "fetched_chunks", + "parachain_fetched_chunks_total", "Total number of fetched chunks.", ), &["success"] @@ -91,7 +91,7 @@ impl metrics::Metrics for Metrics { served_chunks: prometheus::register( CounterVec::new( Opts::new( - "served_chunks", + "parachain_served_chunks_total", "Total number of chunks served by this backer.", ), &["success"] @@ -100,7 +100,7 @@ impl metrics::Metrics for Metrics { )?, retries: prometheus::register( Counter::new( - "fetch_retries", + "parachain_fetch_retries_total", "Number of times we did not succeed in fetching a chunk and needed to try more backers.", )?, registry, From 8901344d6f1f2fc83f33ad5293d3601830b4b96c Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Thu, 25 Feb 2021 23:18:50 +0100 Subject: [PATCH 57/60] Some tests that already work. --- .../{fetch_task.rs => fetch_task/mod.rs} | 3 + .../src/requester/fetch_task/tests.rs | 166 ++++++++++++++++++ 2 files changed, 169 insertions(+) rename node/network/availability-distribution/src/requester/{fetch_task.rs => fetch_task/mod.rs} (99%) create mode 100644 node/network/availability-distribution/src/requester/fetch_task/tests.rs diff --git a/node/network/availability-distribution/src/requester/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task/mod.rs similarity index 99% rename from node/network/availability-distribution/src/requester/fetch_task.rs rename to node/network/availability-distribution/src/requester/fetch_task/mod.rs index 10d0821a23d0..28d92ef8d2f0 100644 --- a/node/network/availability-distribution/src/requester/fetch_task.rs +++ b/node/network/availability-distribution/src/requester/fetch_task/mod.rs @@ -42,6 +42,9 @@ use crate::{ metrics::{Metrics, SUCCEEDED, FAILED}, }; +#[cfg(test)] +mod tests; + /// Configuration for a `FetchTask` /// /// This exists to separate preparation of a `FetchTask` from actual starting it, which is diff --git a/node/network/availability-distribution/src/requester/fetch_task/tests.rs b/node/network/availability-distribution/src/requester/fetch_task/tests.rs new file mode 100644 index 000000000000..510c2b04e8ef --- /dev/null +++ b/node/network/availability-distribution/src/requester/fetch_task/tests.rs @@ -0,0 +1,166 @@ +// Copyright 2021 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use std::collections::HashMap; + +use parity_scale_codec::Encode; + +use futures::channel::{mpsc, oneshot}; +use futures::{executor, Future, FutureExt, StreamExt, select}; +use futures::task::{Poll, Context, noop_waker}; + +use sc_network as network; +use sp_keyring::Sr25519Keyring; + +use polkadot_primitives::v1::{CandidateHash, ValidatorIndex}; +use polkadot_node_network_protocol::request_response::v1; +use polkadot_subsystem::messages::AllMessages; + +use crate::metrics::Metrics; +use super::*; + + +#[test] +fn task_can_be_canceled() { + let (task, _rx) = get_test_running_task(); + let (handle, kill) = oneshot::channel(); + std::mem::drop(handle); + let running_task = task.run(kill); + futures::pin_mut!(running_task); + let waker = noop_waker(); + let mut ctx = Context::from_waker(&waker); + assert!(running_task.poll(&mut ctx) == Poll::Ready(()), "Task is immediately finished"); +} + +/// Make sure task won't accept a chunk that has is invalid. +#[test] +fn task_does_not_accept_invalid_chunk() { + let (mut task, rx) = get_test_running_task(); + let validators = vec![Sr25519Keyring::Alice.public().into()]; + task.group = validators; + let test = TestRun { + chunk_responses: { + let mut m = HashMap::new(); + m.insert( + Sr25519Keyring::Alice.public().into(), + v1::ChunkResponse { + chunk: vec![1,2,3], + proof: vec![vec![9,8,2], vec![2,3,4]], + } + ); + m + }, + valid_chunks: HashSet::new(), + }; + test.run(task, rx); +} + +struct TestRun { + /// Response to deliver for a given validator index. + /// None means, answer with NetworkError. + chunk_responses: HashMap, + /// Set of chunks that should be considered valid: + valid_chunks: HashSet>, +} + + +impl TestRun { + fn run(self, task: RunningTask, rx: mpsc::Receiver) { + let mut rx = rx.fuse(); + let task = task.run_inner().fuse(); + futures::pin_mut!(task); + executor::block_on(async { + let mut end_ok = false; + loop { + let msg = select!( + from_task = rx.next() => { + match from_task { + Some(msg) => msg, + None => break, + } + }, + () = task => + break, + ); + match msg { + FromFetchTask::Concluded(_) => break, + FromFetchTask::Message(msg) => + end_ok = self.handle_message(msg).await, + } + } + if !end_ok { + panic!("Task ended prematurely (failed to store valid chunk)!"); + } + }); + } + + /// Returns true, if after processing of the given message it would be ok for the stream to + /// end. + async fn handle_message(&self, msg: AllMessages) -> bool { + match msg { + AllMessages::NetworkBridge(NetworkBridgeMessage::SendRequests(reqs)) => { + let mut valid_responses = 0; + for req in reqs { + let req = match req { + Requests::AvailabilityFetching(req) => req, + }; + let response = self.chunk_responses.get(&req.peer) + .ok_or(network::RequestFailure::Refused); + + if let Ok(resp) = &response { + if self.valid_chunks.contains(&resp.chunk) { + valid_responses += 1; + } + } + req.pending_response.send(response.map(Encode::encode)) + .expect("Sending response should succeed"); + } + return (valid_responses == 0) && self.valid_chunks.is_empty() + } + AllMessages::AvailabilityStore( + AvailabilityStoreMessage::StoreChunk { chunk, .. } + ) => { + assert!(self.valid_chunks.contains(&chunk.chunk)); + return true + } + _ => { + tracing::debug!(target: LOG_TARGET, "Unexpected message"); + return false + } + } + } +} + +fn get_test_running_task() -> (RunningTask, mpsc::Receiver) { + let (tx,rx) = mpsc::channel(0); + + ( + RunningTask { + session_index: 0, + group_index: GroupIndex(0), + group: Vec::new(), + request: AvailabilityFetchingRequest { + candidate_hash: CandidateHash([43u8;32].into()), + index: ValidatorIndex(0), + }, + erasure_root: Hash::repeat_byte(99), + relay_parent: Hash::repeat_byte(71), + sender: tx, + metrics: Metrics::new_dummy(), + }, + rx + ) +} From 1d29b5cc5997905866272d8080b39fb598c0e23f Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Thu, 25 Feb 2021 23:19:21 +0100 Subject: [PATCH 58/60] Slightly better docs. --- node/subsystem/src/messages.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node/subsystem/src/messages.rs b/node/subsystem/src/messages.rs index 629b25df881e..85ebb2099327 100644 --- a/node/subsystem/src/messages.rs +++ b/node/subsystem/src/messages.rs @@ -269,7 +269,7 @@ impl NetworkBridgeMessage { /// Availability Distribution Message. #[derive(Debug, derive_more::From)] pub enum AvailabilityDistributionMessage { - /// Incoming request for an availability chunk. + /// Incoming network request for an availability chunk. AvailabilityFetchingRequest(IncomingRequest) } From 83ff6668396f5b9458ef4ef8a9a61a171768b4c4 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 26 Feb 2021 14:19:12 +0100 Subject: [PATCH 59/60] Some more tests. --- .../availability-distribution/src/metrics.rs | 5 + .../src/requester/fetch_task/mod.rs | 2 +- .../src/requester/fetch_task/tests.rs | 167 +++++++++++++++++- 3 files changed, 164 insertions(+), 10 deletions(-) diff --git a/node/network/availability-distribution/src/metrics.rs b/node/network/availability-distribution/src/metrics.rs index b7c41b04b83f..c07500996fa2 100644 --- a/node/network/availability-distribution/src/metrics.rs +++ b/node/network/availability-distribution/src/metrics.rs @@ -53,6 +53,11 @@ struct MetricsInner { } impl Metrics { + /// Create new dummy metrics, not reporting anything. + pub fn new_dummy() -> Self { + Metrics(None) + } + /// Increment counter on fetched labels. pub fn on_fetch(&self, label: &'static str) { if let Some(metrics) = &self.0 { diff --git a/node/network/availability-distribution/src/requester/fetch_task/mod.rs b/node/network/availability-distribution/src/requester/fetch_task/mod.rs index 28d92ef8d2f0..3e187f9502e8 100644 --- a/node/network/availability-distribution/src/requester/fetch_task/mod.rs +++ b/node/network/availability-distribution/src/requester/fetch_task/mod.rs @@ -358,7 +358,7 @@ impl RunningTask { match branch_hash(&self.erasure_root, &chunk.proof, chunk.index.0 as usize) { Ok(hash) => hash, Err(e) => { - tracing::trace!( + tracing::warn!( target: LOG_TARGET, candidate_hash = ?self.request.candidate_hash, origin = ?validator, diff --git a/node/network/availability-distribution/src/requester/fetch_task/tests.rs b/node/network/availability-distribution/src/requester/fetch_task/tests.rs index 510c2b04e8ef..b4254850563c 100644 --- a/node/network/availability-distribution/src/requester/fetch_task/tests.rs +++ b/node/network/availability-distribution/src/requester/fetch_task/tests.rs @@ -15,6 +15,7 @@ // along with Polkadot. If not, see . use std::collections::HashMap; +use std::sync::Arc; use parity_scale_codec::Encode; @@ -22,17 +23,17 @@ use futures::channel::{mpsc, oneshot}; use futures::{executor, Future, FutureExt, StreamExt, select}; use futures::task::{Poll, Context, noop_waker}; +use polkadot_erasure_coding::{obtain_chunks_v1 as obtain_chunks, branches}; use sc_network as network; use sp_keyring::Sr25519Keyring; -use polkadot_primitives::v1::{CandidateHash, ValidatorIndex}; +use polkadot_primitives::v1::{AvailableData, BlockData, CandidateHash, HeadData, PersistedValidationData, PoV, ValidatorIndex}; use polkadot_node_network_protocol::request_response::v1; use polkadot_subsystem::messages::AllMessages; use crate::metrics::Metrics; use super::*; - #[test] fn task_can_be_canceled() { let (task, _rx) = get_test_running_task(); @@ -56,10 +57,12 @@ fn task_does_not_accept_invalid_chunk() { let mut m = HashMap::new(); m.insert( Sr25519Keyring::Alice.public().into(), - v1::ChunkResponse { - chunk: vec![1,2,3], - proof: vec![vec![9,8,2], vec![2,3,4]], - } + AvailabilityFetchingResponse::Chunk( + v1::ChunkResponse { + chunk: vec![1,2,3], + proof: vec![vec![9,8,2], vec![2,3,4]], + } + ) ); m }, @@ -68,10 +71,126 @@ fn task_does_not_accept_invalid_chunk() { test.run(task, rx); } +#[test] +fn task_stores_valid_chunk() { + let (mut task, rx) = get_test_running_task(); + let (root_hash, chunk) = get_valid_chunk_data(); + task.erasure_root = root_hash; + task.request.index = chunk.index; + + let validators = vec![Sr25519Keyring::Alice.public().into()]; + task.group = validators; + + let test = TestRun { + chunk_responses: { + let mut m = HashMap::new(); + m.insert( + Sr25519Keyring::Alice.public().into(), + AvailabilityFetchingResponse::Chunk( + v1::ChunkResponse { + chunk: chunk.chunk.clone(), + proof: chunk.proof, + } + ) + ); + m + }, + valid_chunks: { + let mut s = HashSet::new(); + s.insert(chunk.chunk); + s + }, + }; + test.run(task, rx); +} + +#[test] +fn task_does_not_accept_wrongly_indexed_chunk() { + let (mut task, rx) = get_test_running_task(); + let (root_hash, chunk) = get_valid_chunk_data(); + task.erasure_root = root_hash; + task.request.index = ValidatorIndex(chunk.index.0+1); + + let validators = vec![Sr25519Keyring::Alice.public().into()]; + task.group = validators; + + let test = TestRun { + chunk_responses: { + let mut m = HashMap::new(); + m.insert( + Sr25519Keyring::Alice.public().into(), + AvailabilityFetchingResponse::Chunk( + v1::ChunkResponse { + chunk: chunk.chunk.clone(), + proof: chunk.proof, + } + ) + ); + m + }, + valid_chunks: HashSet::new(), + }; + test.run(task, rx); +} + +/// Task stores chunk, if there is at least one validator having a valid chunk. +#[test] +fn task_stores_valid_chunk_if_there_is_one() { + let (mut task, rx) = get_test_running_task(); + let (root_hash, chunk) = get_valid_chunk_data(); + task.erasure_root = root_hash; + task.request.index = chunk.index; + + let validators = [ + // Only Alice has valid chunk - should succeed, even though she is tried last. + Sr25519Keyring::Alice, + Sr25519Keyring::Bob, Sr25519Keyring::Charlie, + Sr25519Keyring::Dave, Sr25519Keyring::Eve, + ] + .iter().map(|v| v.public().into()).collect::>(); + task.group = validators; + + let test = TestRun { + chunk_responses: { + let mut m = HashMap::new(); + m.insert( + Sr25519Keyring::Alice.public().into(), + AvailabilityFetchingResponse::Chunk( + v1::ChunkResponse { + chunk: chunk.chunk.clone(), + proof: chunk.proof, + } + ) + ); + m.insert( + Sr25519Keyring::Bob.public().into(), + AvailabilityFetchingResponse::NoSuchChunk + ); + m.insert( + Sr25519Keyring::Charlie.public().into(), + AvailabilityFetchingResponse::Chunk( + v1::ChunkResponse { + chunk: vec![1,2,3], + proof: vec![vec![9,8,2], vec![2,3,4]], + } + ) + ); + + m + }, + valid_chunks: { + let mut s = HashSet::new(); + s.insert(chunk.chunk); + s + }, + }; + test.run(task, rx); +} + struct TestRun { /// Response to deliver for a given validator index. /// None means, answer with NetworkError. - chunk_responses: HashMap, + chunk_responses: HashMap, /// Set of chunks that should be considered valid: valid_chunks: HashSet>, } @@ -79,6 +198,7 @@ struct TestRun { impl TestRun { fn run(self, task: RunningTask, rx: mpsc::Receiver) { + sp_tracing::try_init_simple(); let mut rx = rx.fuse(); let task = task.run_inner().fuse(); futures::pin_mut!(task); @@ -120,7 +240,7 @@ impl TestRun { let response = self.chunk_responses.get(&req.peer) .ok_or(network::RequestFailure::Refused); - if let Ok(resp) = &response { + if let Ok(AvailabilityFetchingResponse::Chunk(resp)) = &response { if self.valid_chunks.contains(&resp.chunk) { valid_responses += 1; } @@ -131,9 +251,10 @@ impl TestRun { return (valid_responses == 0) && self.valid_chunks.is_empty() } AllMessages::AvailabilityStore( - AvailabilityStoreMessage::StoreChunk { chunk, .. } + AvailabilityStoreMessage::StoreChunk { chunk, tx, .. } ) => { assert!(self.valid_chunks.contains(&chunk.chunk)); + tx.send(Ok(())).expect("Answering fetching task should work"); return true } _ => { @@ -144,6 +265,7 @@ impl TestRun { } } +/// Get a `RunningTask` filled with dummy values. fn get_test_running_task() -> (RunningTask, mpsc::Receiver) { let (tx,rx) = mpsc::channel(0); @@ -164,3 +286,30 @@ fn get_test_running_task() -> (RunningTask, mpsc::Receiver) { rx ) } + +fn get_valid_chunk_data() -> (Hash, ErasureChunk) { + let fake_validator_count = 10; + let persisted = PersistedValidationData { + parent_head: HeadData(vec![7, 8, 9]), + relay_parent_number: Default::default(), + max_pov_size: 1024, + relay_parent_storage_root: Default::default(), + }; + let pov_block = PoV { + block_data: BlockData(vec![45, 46, 47]), + }; + let available_data = AvailableData { + validation_data: persisted, pov: Arc::new(pov_block), + }; + let chunks = obtain_chunks(fake_validator_count, &available_data).unwrap(); + let branches = branches(chunks.as_ref()); + let root = branches.root(); + let chunk = branches.enumerate() + .map(|(index, (proof, chunk))| ErasureChunk { + chunk: chunk.to_vec(), + index: ValidatorIndex(index as _), + proof, + }) + .next().expect("There really should be 10 chunks."); + (root, chunk) +} From a0e01ec1f91e136856c56a558ff63949f098d3af Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 26 Feb 2021 15:08:02 +0100 Subject: [PATCH 60/60] Fix network bridge test. --- node/network/bridge/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node/network/bridge/src/lib.rs b/node/network/bridge/src/lib.rs index c50cf49c2ce4..25baae8b4671 100644 --- a/node/network/bridge/src/lib.rs +++ b/node/network/bridge/src/lib.rs @@ -1524,7 +1524,7 @@ mod tests { fn spread_event_to_subsystems_is_up_to_date() { // Number of subsystems expected to be interested in a network event, // and hence the network event broadcasted to. - const EXPECTED_COUNT: usize = 6; + const EXPECTED_COUNT: usize = 5; let mut cnt = 0_usize; for msg in AllMessages::dispatch_iter(NetworkBridgeEvent::PeerDisconnected(PeerId::random())) {