From 6f2e0e919ea4418273f7a5400d57cb90a3bb987d Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Thu, 4 Feb 2021 10:05:07 +0100
Subject: [PATCH 01/60] WIP

---
 .../availability-distribution/src/lib.rs      | 1195 +----------------
 primitives/src/v0.rs                          |    3 +-
 2 files changed, 67 insertions(+), 1131 deletions(-)

diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs
index a35be6a3a219..9cd1f81337ab 100644
--- a/node/network/availability-distribution/src/lib.rs
+++ b/node/network/availability-distribution/src/lib.rs
@@ -1,4 +1,5 @@
-// Copyright 2020 Parity Technologies (UK) Ltd.
+
+// Copyright 2021 Parity Technologies (UK) Ltd.
 // This file is part of Polkadot.
 
 // Polkadot is free software: you can redistribute it and/or modify
@@ -14,885 +15,91 @@
 // You should have received a copy of the GNU General Public License
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
 
-//! The availability distribution
-//!
-//! Transforms `AvailableData` into erasure chunks, which are distributed to peers
-//! who are interested in the relevant candidates.
-//! Gossip messages received from other peers are verified and gossiped to interested
-//! peers. Verified in this context means, the erasure chunks contained merkle proof
-//! is checked.
-
-#![deny(unused_crate_dependencies, unused_qualifications)]
-
-use parity_scale_codec::{Decode, Encode};
-use futures::{channel::oneshot, FutureExt, TryFutureExt};
-
-use sp_core::crypto::Public;
-use sp_keystore::{CryptoStore, SyncCryptoStorePtr};
-
-use polkadot_erasure_coding::branch_hash;
-use polkadot_node_network_protocol::{
-	v1 as protocol_v1, PeerId, ReputationChange as Rep, View, OurView,
-};
-use polkadot_node_subsystem_util::metrics::{self, prometheus};
-use polkadot_primitives::v1::{
-	BlakeTwo256, CoreState, ErasureChunk, Hash, HashT,
-	SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID, CandidateHash,
-	CandidateDescriptor,
-};
-use polkadot_subsystem::messages::{
-	AllMessages, AvailabilityDistributionMessage, AvailabilityStoreMessage, ChainApiMessage,
-	NetworkBridgeMessage, RuntimeApiMessage, RuntimeApiRequest, NetworkBridgeEvent
-};
-use polkadot_subsystem::{
-	jaeger, errors::{ChainApiError, RuntimeApiError}, PerLeafSpan,
-	ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem, Subsystem, SubsystemContext, SubsystemError,
-};
-use std::collections::{HashMap, HashSet};
-use std::collections::hash_map::Entry;
-use std::iter;
-use thiserror::Error;
-
-#[cfg(test)]
-mod tests;
-
-const LOG_TARGET: &'static str = "availability_distribution";
-
-#[derive(Debug, Error)]
-enum Error {
-	#[error("Response channel to obtain StoreChunk failed")]
-	StoreChunkResponseChannel(#[source] oneshot::Canceled),
-
-	#[error("Response channel to obtain QueryChunk failed")]
-	QueryChunkResponseChannel(#[source] oneshot::Canceled),
-
-	#[error("Response channel to obtain QueryAncestors failed")]
-	QueryAncestorsResponseChannel(#[source] oneshot::Canceled),
-	#[error("RuntimeAPI to obtain QueryAncestors failed")]
-	QueryAncestors(#[source] ChainApiError),
-
-	#[error("Response channel to obtain QuerySession failed")]
-	QuerySessionResponseChannel(#[source] oneshot::Canceled),
-	#[error("RuntimeAPI to obtain QuerySession failed")]
-	QuerySession(#[source] RuntimeApiError),
-
-	#[error("Response channel to obtain QueryValidators failed")]
-	QueryValidatorsResponseChannel(#[source] oneshot::Canceled),
-	#[error("RuntimeAPI to obtain QueryValidators failed")]
-	QueryValidators(#[source] RuntimeApiError),
-
-	#[error("Response channel to obtain AvailabilityCores failed")]
-	AvailabilityCoresResponseChannel(#[source] oneshot::Canceled),
-	#[error("RuntimeAPI to obtain AvailabilityCores failed")]
-	AvailabilityCores(#[source] RuntimeApiError),
-
-	#[error("Response channel to obtain AvailabilityCores failed")]
-	QueryAvailabilityResponseChannel(#[source] oneshot::Canceled),
-
-	#[error("Receive channel closed")]
-	IncomingMessageChannel(#[source] SubsystemError),
-}
-
-type Result<T> = std::result::Result<T, Error>;
-
-const COST_MERKLE_PROOF_INVALID: Rep = Rep::new(-100, "Merkle proof was invalid");
-const COST_NOT_A_LIVE_CANDIDATE: Rep = Rep::new(-51, "Candidate is not live");
-const COST_PEER_DUPLICATE_MESSAGE: Rep = Rep::new(-500, "Peer sent identical messages");
-const BENEFIT_VALID_MESSAGE_FIRST: Rep = Rep::new(15, "Valid message with new information");
-const BENEFIT_VALID_MESSAGE: Rep = Rep::new(10, "Valid message");
-
-/// Checked signed availability bitfield that is distributed
-/// to other peers.
-#[derive(Encode, Decode, Debug, Clone, PartialEq, Eq, Hash)]
-pub struct AvailabilityGossipMessage {
-	/// Anchor hash of the candidate the `ErasureChunk` is associated to.
-	pub candidate_hash: CandidateHash,
-	/// The erasure chunk, a encoded information part of `AvailabilityData`.
-	pub erasure_chunk: ErasureChunk,
-}
-
-impl From<AvailabilityGossipMessage> for protocol_v1::AvailabilityDistributionMessage {
-	fn from(message: AvailabilityGossipMessage) -> Self {
-		Self::Chunk(message.candidate_hash, message.erasure_chunk)
-	}
-}
-
-/// Data used to track information of peers and relay parents the
-/// overseer ordered us to work on.
-#[derive(Debug, Default)]
-struct ProtocolState {
-	/// Track all active peers and their views
-	/// to determine what is relevant to them.
-	peer_views: HashMap<PeerId, View>,
-
-	/// Our own view.
-	view: OurView,
-
-	/// Caches a mapping of relay parents or ancestor to live candidate hashes.
-	/// Allows fast intersection of live candidates with views and consecutive unioning.
-	/// Maps relay parent / ancestor -> candidate hashes.
-	live_under: HashMap<Hash, HashSet<CandidateHash>>,
-
-	/// Track things needed to start and stop work on a particular relay parent.
-	per_relay_parent: HashMap<Hash, PerRelayParent>,
-
-	/// Track data that is specific to a candidate.
-	per_candidate: HashMap<CandidateHash, PerCandidate>,
+/// The bitfield distribution subsystem.
+pub struct AvailabilityDistributionSubsystem {
+	/// Pointer to a keystore, which is required for determining this nodes validator index.
+	keystore: SyncCryptoStorePtr,
+	/// Prometheus metrics.
+	metrics: Metrics,
 }
 
+/// Metadata about a candidate that is part of the live_candidates set.
+///
+/// Those which were not present in a cache are "fresh" and have their candidate descriptor attached. This
+/// information is propagated to the higher level where it can be used to create data entries. Cached candidates
+/// already have entries associated with them, and thus don't need this metadata to be fetched.
 #[derive(Debug)]
-struct PerCandidate {
-	/// A Candidate and a set of known erasure chunks in form of messages to be gossiped / distributed if the peer view wants that.
-	/// This is _across_ peers and not specific to a particular one.
-	/// candidate hash + erasure chunk index -> gossip message
-	message_vault: HashMap<u32, AvailabilityGossipMessage>,
-
-	/// Track received erasure chunk indices per peer.
-	received_messages: HashMap<PeerId, HashSet<ValidatorIndex>>,
-
-	/// Track sent erasure chunk indices per peer.
-	sent_messages: HashMap<PeerId, HashSet<ValidatorIndex>>,
-
-	/// The set of validators.
-	validators: Vec<ValidatorId>,
-
-	/// If this node is a validator, note the index in the validator set.
-	validator_index: Option<ValidatorIndex>,
-
-	/// The descriptor of this candidate.
-	descriptor: CandidateDescriptor,
-
-	/// The set of relay chain blocks this appears to be live in.
-	live_in: HashSet<Hash>,
-
-	/// A Jaeger span relating to this candidate.
-	span: jaeger::JaegerSpan,
-}
-
-impl PerCandidate {
-	/// Returns `true` iff the given `validator_index` is required by the given `peer`.
-	fn message_required_by_peer(&self, peer: &PeerId, validator_index: ValidatorIndex) -> bool {
-		self.received_messages.get(peer).map(|v| !v.contains(&validator_index)).unwrap_or(true)
-			&& self.sent_messages.get(peer).map(|v| !v.contains(&validator_index)).unwrap_or(true)
-	}
-
-	/// Add a chunk to the message vault. Overwrites anything that was already present.
-	fn add_message(&mut self, chunk_index: u32, message: AvailabilityGossipMessage) {
-		let _ = self.message_vault.insert(chunk_index, message);
-	}
-
-	/// Clean up the span if we've got our own chunk.
-	fn drop_span_after_own_availability(&mut self) {
-		if let Some(validator_index) = self.validator_index {
-			if self.message_vault.contains_key(&validator_index) {
-				self.span = jaeger::JaegerSpan::Disabled;
-			}
-		}
-	}
+enum FetchedLiveCandidate {
+	Cached,
+	Fresh(CandidateDescriptor),
 }
 
-#[derive(Debug)]
-struct PerRelayParent {
-	/// Set of `K` ancestors for this relay parent.
-	ancestors: Vec<Hash>,
-	/// Live candidates, according to this relay parent.
-	live_candidates: HashSet<CandidateHash>,
-	/// The span that belongs to this relay parent.
-	span: PerLeafSpan,
-}
+struct ProtocolState {
+	/// Candidates we need to fetch our chunk for.
+	chunks_to_fetch: HashMap<CandidateHash, CandidateDescriptor>,
 
-impl ProtocolState {
-	/// Unionize all live candidate hashes of the given relay parents and their recent
-	/// ancestors.
+	/// Localized information about sessions we are currently interested in.
 	///
-	/// Ignores all non existent relay parents, so this can be used directly with a peers view.
-	/// Returns a set of candidate hashes.
-	#[tracing::instrument(level = "trace", skip(relay_parents), fields(subsystem = LOG_TARGET))]
-	fn cached_live_candidates_unioned<'a>(
-		&'a self,
-		relay_parents: impl IntoIterator<Item = &'a Hash> + 'a,
-	) -> HashSet<CandidateHash> {
-		cached_live_candidates_unioned(
-			&self.per_relay_parent,
-			relay_parents
-		)
-	}
-
-	#[tracing::instrument(level = "trace", skip(candidates, span), fields(subsystem = LOG_TARGET))]
-	fn add_relay_parent(
-		&mut self,
-		relay_parent: Hash,
-		validators: Vec<ValidatorId>,
-		validator_index: Option<ValidatorIndex>,
-		candidates: HashMap<CandidateHash, FetchedLiveCandidate>,
-		ancestors: Vec<Hash>,
-		span: PerLeafSpan,
-	) {
-		let per_relay_parent = self.per_relay_parent.entry(relay_parent).or_insert_with(|| PerRelayParent {
-			span,
-			ancestors,
-			live_candidates: candidates.keys().cloned().collect(),
-		});
-
-		// register the relation of relay_parent to candidate..
-		for (receipt_hash, fetched) in candidates {
-			let candidate_entry = match self.per_candidate.entry(receipt_hash) {
-				Entry::Occupied(e) => e.into_mut(),
-				Entry::Vacant(e) => {
-					if let FetchedLiveCandidate::Fresh(descriptor) = fetched {
-						e.insert(PerCandidate {
-							message_vault: HashMap::new(),
-							received_messages: HashMap::new(),
-							sent_messages: HashMap::new(),
-							validators: validators.clone(),
-							validator_index,
-							descriptor,
-							live_in: HashSet::new(),
-							span: if validator_index.is_some() {
-								jaeger::candidate_hash_span(&receipt_hash, "pending-availability")
-							} else {
-								jaeger::JaegerSpan::Disabled
-							},
-						})
-					} else {
-						tracing::warn!(target: LOG_TARGET, "No `per_candidate` but not fresh. logic error");
-						continue;
-					}
-				}
-			};
+	/// This is usually the current one and at session boundaries also the last one.
+	session_infos: HashMap<SessionIndex, SessionInfo>,
 
-			// Create some span that will make it able to switch between the candidate and relay parent span.
-			let mut span = per_relay_parent.span.child("live-candidate");
-			span.add_string_tag("candidate-hash", &format!("{:?}", receipt_hash));
-
-			candidate_entry.span.add_follows_from(&span);
-			candidate_entry.live_in.insert(relay_parent);
-		}
-	}
-
-	#[tracing::instrument(level = "trace", skip(self), fields(subsystem = LOG_TARGET))]
-	fn remove_relay_parent(&mut self, relay_parent: &Hash) {
-		if let Some(per_relay_parent) = self.per_relay_parent.remove(relay_parent) {
-			for candidate_hash in per_relay_parent.live_candidates {
-				// Prune the candidate if this was the last member of our view
-				// to consider it live (including its ancestors).
-				if let Entry::Occupied(mut occ) = self.per_candidate.entry(candidate_hash) {
-					occ.get_mut().live_in.remove(relay_parent);
-					if occ.get().live_in.is_empty() {
-						occ.remove();
-					}
-				}
-			}
-		}
-	}
-
-	/// Removes all entries from live_under which aren't referenced in the ancestry of
-	/// one of our live relay-chain heads.
-	fn clean_up_live_under_cache(&mut self) {
-		let extended_view: HashSet<_> = self.per_relay_parent.iter()
-			.map(|(r_hash, v)| v.ancestors.iter().cloned().chain(iter::once(*r_hash)))
-			.flatten()
-			.collect();
-
-		self.live_under.retain(|ancestor_hash, _| extended_view.contains(ancestor_hash));
-	}
 }
 
-fn cached_live_candidates_unioned<'a>(
-	per_relay_parent: &'a HashMap<Hash, PerRelayParent>,
-	relay_parents: impl IntoIterator<Item = &'a Hash> + 'a,
-) -> HashSet<CandidateHash> {
-	relay_parents
-		.into_iter()
-		.filter_map(|r| per_relay_parent.get(r))
-		.map(|per_relay_parent| per_relay_parent.live_candidates.iter().cloned())
-		.flatten()
-		.collect()
-}
-
-/// Deal with network bridge updates and track what needs to be tracked
-/// which depends on the message type received.
-#[tracing::instrument(level = "trace", skip(ctx, keystore, metrics), fields(subsystem = LOG_TARGET))]
-async fn handle_network_msg<Context>(
-	ctx: &mut Context,
-	keystore: &SyncCryptoStorePtr,
-	state: &mut ProtocolState,
-	metrics: &Metrics,
-	bridge_message: NetworkBridgeEvent<protocol_v1::AvailabilityDistributionMessage>,
-) -> Result<()>
-where
-	Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
-{
-	match bridge_message {
-		NetworkBridgeEvent::PeerConnected(peerid, _role) => {
-			// insert if none already present
-			state.peer_views.entry(peerid).or_default();
-		}
-		NetworkBridgeEvent::PeerDisconnected(peerid) => {
-			// get rid of superfluous data
-			state.peer_views.remove(&peerid);
-		}
-		NetworkBridgeEvent::PeerViewChange(peerid, view) => {
-			handle_peer_view_change(ctx, state, peerid, view, metrics).await;
-		}
-		NetworkBridgeEvent::OurViewChange(view) => {
-			handle_our_view_change(ctx, keystore, state, view, metrics).await?;
-		}
-		NetworkBridgeEvent::PeerMessage(remote, msg) => {
-			let gossiped_availability = match msg {
-				protocol_v1::AvailabilityDistributionMessage::Chunk(candidate_hash, chunk) => {
-					AvailabilityGossipMessage {
-						candidate_hash,
-						erasure_chunk: chunk,
-					}
-				}
-			};
-
-			process_incoming_peer_message(ctx, state, remote, gossiped_availability, metrics)
-				.await?;
-		}
-	}
-	Ok(())
-}
-
-/// Handle the changes necessary when our view changes.
-#[tracing::instrument(level = "trace", skip(ctx, keystore, metrics), fields(subsystem = LOG_TARGET))]
-async fn handle_our_view_change<Context>(
-	ctx: &mut Context,
-	keystore: &SyncCryptoStorePtr,
-	state: &mut ProtocolState,
-	view: OurView,
-	metrics: &Metrics,
-) -> Result<()>
-where
-	Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
-{
-	let _timer = metrics.time_handle_our_view_change();
-
-	let old_view = std::mem::replace(&mut state.view, view);
-
-	// needed due to borrow rules
-	let view = state.view.clone();
-
-	// add all the relay parents and fill the cache
-	for (added, span) in view.span_per_head().iter().filter(|v| !old_view.contains(&v.0)) {
-		let span = PerLeafSpan::new(span.clone(), "availability-distribution");
-
-		let validators = query_validators(ctx, *added).await?;
-		let validator_index = obtain_our_validator_index(&validators, keystore.clone()).await;
-		let (candidates, ancestors)
-			= query_live_candidates(ctx, &mut state.live_under, *added).await?;
-
-		state.add_relay_parent(
-			*added,
-			validators,
-			validator_index,
-			candidates,
-			ancestors,
-			span,
-		);
-	}
-
-	// handle all candidates
-	let mut messages_out = Vec::new();
-	for candidate_hash in state.cached_live_candidates_unioned(view.difference(&old_view)) {
-		// If we are not a validator for this candidate, let's skip it.
-		match state.per_candidate.get(&candidate_hash) {
-			None => continue,
-			Some(c) if c.validator_index.is_none() => continue,
-			Some(_) => {},
-		};
-
-		// check if the availability is present in the store exists
-		if !query_data_availability(ctx, candidate_hash).await? {
-			continue;
-		}
-
-		// obtain interested peers in the candidate hash
-		let peers: Vec<PeerId> = state
-			.peer_views
-			.clone()
-			.into_iter()
-			.filter(|(_peer, view)| {
-				// collect all direct interests of a peer w/o ancestors
-				state
-					.cached_live_candidates_unioned(view.heads.iter())
-					.contains(&candidate_hash)
-			})
-			.map(|(peer, _view)| peer.clone())
-			.collect();
-
-		let per_candidate = state.per_candidate.get_mut(&candidate_hash)
-			.expect("existence checked above; qed");
-
-		let validator_count = per_candidate.validators.len();
-
-		// distribute all erasure messages to interested peers
-		for chunk_index in 0u32..(validator_count as u32) {
-			let _span = {
-				let mut span = per_candidate.span.child("load-and-distribute");
-				span.add_string_tag("chunk-index", &format!("{}", chunk_index));
-				span
-			};
-			let message = if let Some(message) = per_candidate.message_vault.get(&chunk_index) {
-				tracing::trace!(
-					target: LOG_TARGET,
-					%chunk_index,
-					?candidate_hash,
-					"Retrieved chunk from message vault",
-				);
-				message.clone()
-			} else if let Some(erasure_chunk) = query_chunk(ctx, candidate_hash, chunk_index as ValidatorIndex).await? {
-				tracing::trace!(
-					target: LOG_TARGET,
-					%chunk_index,
-					?candidate_hash,
-					"Retrieved chunk from availability storage",
-				);
-
-				let msg = AvailabilityGossipMessage {
-					candidate_hash,
-					erasure_chunk,
-				};
-
-				per_candidate.add_message(chunk_index, msg.clone());
-
-				msg
-			} else {
-				tracing::error!(
-					target: LOG_TARGET,
-					%chunk_index,
-					?candidate_hash,
-					"Availability store reported that we have the availability data, but we could not retrieve a chunk of it!",
-				);
-				continue;
-			};
-
-			debug_assert_eq!(message.erasure_chunk.index, chunk_index);
-
-			let peers = peers
-				.iter()
-				.filter(|peer| per_candidate.message_required_by_peer(peer, chunk_index))
-				.cloned()
-				.collect::<Vec<_>>();
-
-			add_tracked_messages_to_batch(&mut messages_out, per_candidate, metrics, peers, iter::once(message));
-		}
-
-		// traces are better if we wait until the loop is done to drop.
-		per_candidate.drop_span_after_own_availability();
-	}
-
-	// send all batched messages out.
-	send_batch_to_network(ctx, messages_out).await;
-
-	// cleanup the removed relay parents and their states
-	old_view.difference(&view).for_each(|r| state.remove_relay_parent(r));
-	state.clean_up_live_under_cache();
-
-	Ok(())
-}
-
-// After this function is invoked, the state reflects the messages as having been sent to a peer.
-#[tracing::instrument(level = "trace", skip(batch, metrics, message_iter), fields(subsystem = LOG_TARGET))]
-fn add_tracked_messages_to_batch(
-	batch: &mut Vec<(Vec<PeerId>, protocol_v1::ValidationProtocol)>,
-	per_candidate: &mut PerCandidate,
-	metrics: &Metrics,
-	peers: Vec<PeerId>,
-	message_iter: impl IntoIterator<Item = AvailabilityGossipMessage>,
-) {
-	for message in message_iter {
-		for peer in peers.iter() {
-			per_candidate
-				.sent_messages
-				.entry(peer.clone())
-				.or_default()
-				.insert(message.erasure_chunk.index);
-		}
-
-		if !peers.is_empty() {
-			batch.push((
-				peers.clone(),
-				protocol_v1::ValidationProtocol::AvailabilityDistribution(message.into()),
-			));
-
-			metrics.on_chunk_distributed();
-		}
-	}
-}
-
-async fn send_batch_to_network(
-	ctx: &mut impl SubsystemContext,
-	batch: Vec<(Vec<PeerId>, protocol_v1::ValidationProtocol)>,
-) {
-	if !batch.is_empty() {
-		ctx.send_message(NetworkBridgeMessage::SendValidationMessages(batch).into()).await
-	}
+/// Localized session information, tailored for the needs of availability distribution.
+struct SessionInfo {
+	/// For each core we maintain a randomized list of corresponding validators.
+	///
+	/// This is so we can query them for chunks, trying them in order. As each validator will
+	/// have a randomized ordering, we should get good load balancing.
+	validator_groups: Vec<Vec<ValidatorIndex>>,
 }
 
-// Send the difference between two views which were not sent
-// to that particular peer.
-#[tracing::instrument(level = "trace", skip(ctx, metrics), fields(subsystem = LOG_TARGET))]
-async fn handle_peer_view_change<Context>(
-	ctx: &mut Context,
-	state: &mut ProtocolState,
-	origin: PeerId,
-	view: View,
-	metrics: &Metrics,
-)
-where
-	Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
-{
-	let current = state.peer_views.entry(origin.clone()).or_default();
-
-	let added: Vec<Hash> = view.difference(&*current).cloned().collect();
-
-	*current = view;
-
-	if added.is_empty() {
-		return
-	}
-
-	// only contains the intersection of what we are interested and
-	// the union of all relay parent's candidates.
-	let added_candidates = state.cached_live_candidates_unioned(added.iter());
-
-	// Send all messages we've seen before and the peer is now interested in.
-	let mut batch = Vec::new();
-	for candidate_hash in added_candidates {
-		let per_candidate = match state.per_candidate.get_mut(&candidate_hash) {
-			Some(p) => p,
-			None => continue,
-		};
-
-		// obtain the relevant chunk indices not sent yet
-		let messages = ((0 as ValidatorIndex)..(per_candidate.validators.len() as ValidatorIndex))
-			.into_iter()
-			.filter_map(|erasure_chunk_index: ValidatorIndex| {
-				// try to pick up the message from the message vault
-				// so we send as much as we have
-				per_candidate
-					.message_vault
-					.get(&erasure_chunk_index)
-					.filter(|_| per_candidate.message_required_by_peer(&origin, erasure_chunk_index))
-			})
-			.cloned()
-			.collect::<HashSet<_>>();
-
-		add_tracked_messages_to_batch(&mut batch, per_candidate, metrics, vec![origin.clone()], messages);
-	}
-
-	send_batch_to_network(ctx, batch).await;
+struct ChunkFetchingInfo {
+	descriptor: CandidateDescriptor,
+	/// Validators that backed the candidate and hopefully have our chunk.
+	backing_group: Vec<ValidatorIndex>,
 }
 
-/// Obtain the first key which has a signing key.
-/// Returns the index within the validator set as `ValidatorIndex`, if there exists one,
-/// otherwise, `None` is returned.
-async fn obtain_our_validator_index(
-	validators: &[ValidatorId],
-	keystore: SyncCryptoStorePtr,
-) -> Option<ValidatorIndex> {
-	for (idx, validator) in validators.iter().enumerate() {
-		if CryptoStore::has_keys(
-			&*keystore,
-			&[(validator.to_raw_vec(), PARACHAIN_KEY_TYPE_ID)],
-		)
-		.await
-		{
-			return Some(idx as ValidatorIndex);
-		}
-	}
-	None
+fn run() {
+	/// Get current heads
+	/// For each chunk/slot, update randomized list of validators to query on session bundaries.
+	/// Fetch pending availability candidates and add them to `chunks_to_fetch`.
 }
 
-/// Handle an incoming message from a peer.
-#[tracing::instrument(level = "trace", skip(ctx, metrics), fields(subsystem = LOG_TARGET))]
-async fn process_incoming_peer_message<Context>(
+/// Obtain all live candidates under a particular relay head. This implicitly includes
+/// `K` ancestors of the head, such that the candidates pending availability in all of
+/// the states of the head and the ancestors are unioned together to produce the
+/// return type of this function. Each candidate hash is paired with information about
+/// from where it was fetched.
+///
+/// This also updates all `live_under` cached by the protocol state and returns a list
+/// of up to `K` ancestors of the relay-parent.
+#[tracing::instrument(level = "trace", skip(ctx, live_under), fields(subsystem = LOG_TARGET))]
+async fn query_live_candidates<Context>(
 	ctx: &mut Context,
-	state: &mut ProtocolState,
-	origin: PeerId,
-	message: AvailabilityGossipMessage,
-	metrics: &Metrics,
-) -> Result<()>
+	live_under: &mut HashMap<Hash, HashSet<CandidateHash>>,
+	relay_parent: Hash,
+) -> Result<(HashMap<CandidateHash, FetchedLiveCandidate>, Vec<Hash>)>
 where
 	Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
 {
-	let _timer = metrics.time_process_incoming_peer_message();
-
-	// obtain the set of candidates we are interested in based on our current view
-	let live_candidates = state.cached_live_candidates_unioned(state.view.heads.iter());
-
-	// check if the candidate is of interest
-	let candidate_entry = if live_candidates.contains(&message.candidate_hash) {
-		state.per_candidate
-			.get_mut(&message.candidate_hash)
-			.expect("All live candidates are contained in per_candidate; qed")
-	} else {
-		tracing::trace!(
-			target: LOG_TARGET,
-			candidate_hash = ?message.candidate_hash,
-			peer = %origin,
-			"Peer send not live candidate",
-		);
-		modify_reputation(ctx, origin, COST_NOT_A_LIVE_CANDIDATE).await;
-		return Ok(())
-	};
-
-	// Handle a duplicate before doing expensive checks.
-	if let Some(existing) = candidate_entry.message_vault.get(&message.erasure_chunk.index) {
-		let span = candidate_entry.span.child("handle-duplicate");
-		// check if this particular erasure chunk was already sent by that peer before
-		{
-			let _span = span.child("check-entry");
-			let received_set = candidate_entry
-				.received_messages
-				.entry(origin.clone())
-				.or_default();
-
-			if !received_set.insert(message.erasure_chunk.index) {
-				modify_reputation(ctx, origin, COST_PEER_DUPLICATE_MESSAGE).await;
-				return Ok(());
-			}
-		}
-
-		// check that the message content matches what we have already before rewarding
-		// the peer.
-		{
-			let _span = span.child("check-accurate");
-			if existing == &message {
-				modify_reputation(ctx, origin, BENEFIT_VALID_MESSAGE).await;
-			} else {
-				modify_reputation(ctx, origin, COST_MERKLE_PROOF_INVALID).await;
-			}
-		}
-
-		return Ok(());
-	}
-
-	let span = {
-		let mut span = candidate_entry.span.child("process-new-chunk");
-		span.add_string_tag("peer-id", &origin.to_base58());
-		span
-	};
-
-	// check the merkle proof against the erasure root in the candidate descriptor.
-	let anticipated_hash = {
-		let _span = span.child("check-merkle-root");
-		match branch_hash(
-			&candidate_entry.descriptor.erasure_root,
-			&message.erasure_chunk.proof,
-			message.erasure_chunk.index as usize,
-		) {
-			Ok(hash) => hash,
-			Err(e) => {
-				tracing::trace!(
-					target: LOG_TARGET,
-					candidate_hash = ?message.candidate_hash,
-					peer = %origin,
-					error = ?e,
-					"Failed to calculate chunk merkle proof",
-				);
-				modify_reputation(ctx, origin, COST_MERKLE_PROOF_INVALID).await;
-				return Ok(());
-			},
-		}
-	};
-
-	{
-		let _span = span.child("check-chunk-hash");
-		let erasure_chunk_hash = BlakeTwo256::hash(&message.erasure_chunk.chunk);
-		if anticipated_hash != erasure_chunk_hash {
-			tracing::trace!(
-				target: LOG_TARGET,
-				candidate_hash = ?message.candidate_hash,
-				peer = %origin,
-				"Peer sent chunk with invalid merkle proof",
-			);
-			modify_reputation(ctx, origin, COST_MERKLE_PROOF_INVALID).await;
-			return Ok(());
-		}
-	}
-
-	{
-		// insert into known messages and change reputation. we've guaranteed
-		// above that the message vault doesn't contain any message under this
-		// chunk index already.
-
-		candidate_entry
-				.received_messages
-				.entry(origin.clone())
-				.or_default()
-				.insert(message.erasure_chunk.index);
-
-		modify_reputation(ctx, origin, BENEFIT_VALID_MESSAGE_FIRST).await;
-
-		// save the chunk for our index
-		if Some(message.erasure_chunk.index) == candidate_entry.validator_index {
-			let _span = span.child("store-our-chunk");
-			if store_chunk(
-				ctx,
-				message.candidate_hash,
-				candidate_entry.descriptor.relay_parent,
-				message.erasure_chunk.index,
-				message.erasure_chunk.clone(),
-			).await?.is_err() {
-				tracing::warn!(
-					target: LOG_TARGET,
-					"Failed to store erasure chunk to availability store"
-				);
-			}
-		}
-
-		candidate_entry.add_message(message.erasure_chunk.index, message.clone());
-		candidate_entry.drop_span_after_own_availability();
-	}
-
-	// condense the peers to the peers with interest on the candidate
-	let peers = {
-		let _span = span.child("determine-recipient-peers");
-		let per_relay_parent = &state.per_relay_parent;
-
-		state
-			.peer_views
-			.clone()
-			.into_iter()
-			.filter(|(_, view)| {
-				// peers view must contain the candidate hash too
-				cached_live_candidates_unioned(
-					per_relay_parent,
-					view.heads.iter(),
-				).contains(&message.candidate_hash)
-			})
-			.map(|(peer, _)| -> PeerId { peer.clone() })
-			.filter(|peer| candidate_entry.message_required_by_peer(peer, message.erasure_chunk.index))
-			.collect::<Vec<_>>()
-	};
-
-	drop(span);
-	// gossip that message to interested peers
-	let mut batch = Vec::new();
-	add_tracked_messages_to_batch(&mut batch, candidate_entry, metrics, peers, iter::once(message));
-	send_batch_to_network(ctx, batch).await;
-
-	Ok(())
-}
-
-/// The bitfield distribution subsystem.
-pub struct AvailabilityDistributionSubsystem {
-	/// Pointer to a keystore, which is required for determining this nodes validator index.
-	keystore: SyncCryptoStorePtr,
-	/// Prometheus metrics.
-	metrics: Metrics,
-}
-
-impl AvailabilityDistributionSubsystem {
-	/// Number of ancestors to keep around for the relay-chain heads.
-	const K: usize = 3;
-
-	/// Create a new instance of the availability distribution.
-	pub fn new(keystore: SyncCryptoStorePtr, metrics: Metrics) -> Self {
-		Self { keystore, metrics }
-	}
-
-	/// Start processing work as passed on from the Overseer.
-	async fn run<Context>(self, ctx: Context) -> Result<()>
-	where
-		Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
-	{
-		let mut state = ProtocolState {
-			peer_views: HashMap::new(),
-			view: Default::default(),
-			live_under: HashMap::new(),
-			per_relay_parent: HashMap::new(),
-			per_candidate: HashMap::new(),
-		};
-
-		self.run_inner(ctx, &mut state).await
-	}
-
-	/// Start processing work.
-	#[tracing::instrument(skip(self, ctx), fields(subsystem = LOG_TARGET))]
-	async fn run_inner<Context>(self, mut ctx: Context, state: &mut ProtocolState) -> Result<()>
-	where
-		Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
-	{
-		// work: process incoming messages from the overseer.
-		loop {
-			let message = ctx
-				.recv()
-				.await
-				.map_err(|e| Error::IncomingMessageChannel(e))?;
-			match message {
-				FromOverseer::Communication {
-					msg: AvailabilityDistributionMessage::NetworkBridgeUpdateV1(event),
-				} => {
-					if let Err(e) = handle_network_msg(
-						&mut ctx,
-						&self.keystore.clone(),
-						state,
-						&self.metrics,
-						event,
-					)
-					.await
-					{
-						tracing::warn!(
-							target: LOG_TARGET,
-							err = ?e,
-							"Failed to handle incoming network messages",
-						);
-					}
-				}
-				FromOverseer::Communication {
-					msg: AvailabilityDistributionMessage::AvailabilityFetchingRequest(_),
-				} => { 
-					// TODO: Implement issue 2306:
-					tracing::warn!(
-						target: LOG_TARGET,
-						"To be implemented, see: https://github.com/paritytech/polkadot/issues/2306 !",
-					);
-				}
-				FromOverseer::Signal(OverseerSignal::ActiveLeaves(ActiveLeavesUpdate {
-					activated: _,
-					deactivated: _,
-				})) => {
-					// handled at view change
-				}
-				FromOverseer::Signal(OverseerSignal::BlockFinalized(..)) => {}
-				FromOverseer::Signal(OverseerSignal::Conclude) => {
-					return Ok(());
-				}
-			}
-		}
-	}
-}
-
-impl<Context> Subsystem<Context> for AvailabilityDistributionSubsystem
-where
-	Context: SubsystemContext<Message = AvailabilityDistributionMessage> + Sync + Send,
-{
-	fn start(self, ctx: Context) -> SpawnedSubsystem {
-		let future = self
-			.run(ctx)
-			.map_err(|e| SubsystemError::with_origin("availability-distribution", e))
-			.boxed();
+	// register one of relay parents (not the ancestors)
+	let ancestors = query_up_to_k_ancestors_in_same_session(
+		ctx,
+		relay_parent,
+		AvailabilityDistributionSubsystem::K,
+	)
+	.await?;
 
-		SpawnedSubsystem {
-			name: "availability-distribution-subsystem",
-			future,
-		}
-	}
-}
+	// query the ones that were not present in the live_under cache and add them
+	// to it.
+	let live_candidates = query_pending_availability_at(
+		ctx,
+		ancestors.iter().cloned().chain(iter::once(relay_parent)),
+		live_under,
+	).await?;
 
-/// Metadata about a candidate that is part of the live_candidates set.
-///
-/// Those which were not present in a cache are "fresh" and have their candidate descriptor attached. This
-/// information is propagated to the higher level where it can be used to create data entries. Cached candidates
-/// already have entries associated with them, and thus don't need this metadata to be fetched.
-#[derive(Debug)]
-enum FetchedLiveCandidate {
-	Cached,
-	Fresh(CandidateDescriptor),
+	Ok((live_candidates, ancestors))
 }
 
 /// Obtain all live candidates for all given `relay_blocks`.
@@ -937,42 +144,6 @@ where
 	Ok(live_candidates)
 }
 
-/// Obtain all live candidates under a particular relay head. This implicitly includes
-/// `K` ancestors of the head, such that the candidates pending availability in all of
-/// the states of the head and the ancestors are unioned together to produce the
-/// return type of this function. Each candidate hash is paired with information about
-/// from where it was fetched.
-///
-/// This also updates all `live_under` cached by the protocol state and returns a list
-/// of up to `K` ancestors of the relay-parent.
-#[tracing::instrument(level = "trace", skip(ctx, live_under), fields(subsystem = LOG_TARGET))]
-async fn query_live_candidates<Context>(
-	ctx: &mut Context,
-	live_under: &mut HashMap<Hash, HashSet<CandidateHash>>,
-	relay_parent: Hash,
-) -> Result<(HashMap<CandidateHash, FetchedLiveCandidate>, Vec<Hash>)>
-where
-	Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
-{
-	// register one of relay parents (not the ancestors)
-	let ancestors = query_up_to_k_ancestors_in_same_session(
-		ctx,
-		relay_parent,
-		AvailabilityDistributionSubsystem::K,
-	)
-	.await?;
-
-	// query the ones that were not present in the live_under cache and add them
-	// to it.
-	let live_candidates = query_pending_availability_at(
-		ctx,
-		ancestors.iter().cloned().chain(iter::once(relay_parent)),
-		live_under,
-	).await?;
-
-	Ok((live_candidates, ancestors))
-}
-
 /// Query all hashes and descriptors of candidates pending availability at a particular block.
 #[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))]
 async fn query_pending_availability<Context>(ctx: &mut Context, relay_parent: Hash)
@@ -1000,239 +171,3 @@ where
 		})
 		.collect())
 }
-
-/// Modify the reputation of a peer based on its behavior.
-#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))]
-async fn modify_reputation<Context>(ctx: &mut Context, peer: PeerId, rep: Rep)
-where
-	Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
-{
-	tracing::trace!(
-		target: LOG_TARGET,
-		rep = ?rep,
-		peer_id = ?peer,
-		"Reputation change for peer",
-	);
-	ctx.send_message(AllMessages::NetworkBridge(
-		NetworkBridgeMessage::ReportPeer(peer, rep),
-	)).await;
-}
-
-/// Query the proof of validity for a particular candidate hash.
-#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))]
-async fn query_data_availability<Context>(ctx: &mut Context, candidate_hash: CandidateHash) -> Result<bool>
-where
-	Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
-{
-	let (tx, rx) = oneshot::channel();
-	ctx.send_message(AllMessages::AvailabilityStore(
-		AvailabilityStoreMessage::QueryDataAvailability(candidate_hash, tx),
-	)).await;
-
-	rx.await.map_err(|e| Error::QueryAvailabilityResponseChannel(e))
-}
-
-#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))]
-async fn query_chunk<Context>(
-	ctx: &mut Context,
-	candidate_hash: CandidateHash,
-	validator_index: ValidatorIndex,
-) -> Result<Option<ErasureChunk>>
-where
-	Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
-{
-	let (tx, rx) = oneshot::channel();
-	ctx.send_message(AllMessages::AvailabilityStore(
-		AvailabilityStoreMessage::QueryChunk(candidate_hash, validator_index, tx),
-	)).await;
-
-	rx.await.map_err(|e| Error::QueryChunkResponseChannel(e))
-}
-
-#[tracing::instrument(level = "trace", skip(ctx, erasure_chunk), fields(subsystem = LOG_TARGET))]
-async fn store_chunk<Context>(
-	ctx: &mut Context,
-	candidate_hash: CandidateHash,
-	relay_parent: Hash,
-	validator_index: ValidatorIndex,
-	erasure_chunk: ErasureChunk,
-) -> Result<std::result::Result<(), ()>>
-where
-	Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
-{
-	let (tx, rx) = oneshot::channel();
-	ctx.send_message(AllMessages::AvailabilityStore(
-		AvailabilityStoreMessage::StoreChunk {
-			candidate_hash,
-			relay_parent,
-			chunk: erasure_chunk,
-			tx,
-		}
-	)).await;
-
-	rx.await.map_err(|e| Error::StoreChunkResponseChannel(e))
-}
-
-/// Query the validator set.
-#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))]
-async fn query_validators<Context>(
-	ctx: &mut Context,
-	relay_parent: Hash,
-) -> Result<Vec<ValidatorId>>
-where
-	Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
-{
-	let (tx, rx) = oneshot::channel();
-	let query_validators = AllMessages::RuntimeApi(RuntimeApiMessage::Request(
-		relay_parent,
-		RuntimeApiRequest::Validators(tx),
-	));
-
-	ctx.send_message(query_validators)
-		.await;
-	rx.await
-		.map_err(|e| Error::QueryValidatorsResponseChannel(e))?
-		.map_err(|e| Error::QueryValidators(e))
-}
-
-/// Query the hash of the `K` ancestors
-#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))]
-async fn query_k_ancestors<Context>(
-	ctx: &mut Context,
-	relay_parent: Hash,
-	k: usize,
-) -> Result<Vec<Hash>>
-where
-	Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
-{
-	let (tx, rx) = oneshot::channel();
-	let query_ancestors = AllMessages::ChainApi(ChainApiMessage::Ancestors {
-		hash: relay_parent,
-		k,
-		response_channel: tx,
-	});
-
-	ctx.send_message(query_ancestors)
-		.await;
-	rx.await
-		.map_err(|e| Error::QueryAncestorsResponseChannel(e))?
-		.map_err(|e| Error::QueryAncestors(e))
-}
-
-/// Query the session index of a relay parent
-#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))]
-async fn query_session_index_for_child<Context>(
-	ctx: &mut Context,
-	relay_parent: Hash,
-) -> Result<SessionIndex>
-where
-	Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
-{
-	let (tx, rx) = oneshot::channel();
-	let query_session_idx_for_child = AllMessages::RuntimeApi(RuntimeApiMessage::Request(
-		relay_parent,
-		RuntimeApiRequest::SessionIndexForChild(tx),
-	));
-
-	ctx.send_message(query_session_idx_for_child)
-		.await;
-	rx.await
-		.map_err(|e| Error::QuerySessionResponseChannel(e))?
-		.map_err(|e| Error::QuerySession(e))
-}
-
-/// Queries up to k ancestors with the constraints of equiv session
-#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))]
-async fn query_up_to_k_ancestors_in_same_session<Context>(
-	ctx: &mut Context,
-	relay_parent: Hash,
-	k: usize,
-) -> Result<Vec<Hash>>
-where
-	Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
-{
-	// k + 1 since we always query the child's session index
-	// ordering is [parent, grandparent, greatgrandparent, greatgreatgrandparent, ...]
-	let ancestors = query_k_ancestors(ctx, relay_parent, k + 1).await?;
-	let desired_session = query_session_index_for_child(ctx, relay_parent).await?;
-	// we would only need `ancestors.len() - 1`, but the one extra could avoid a re-alloc
-	// if the consumer wants to push the `relay_parent` onto it too and does not hurt otherwise
-	let mut acc = Vec::with_capacity(ancestors.len());
-
-	// iterate from youngest to oldest
-	let mut iter = ancestors.into_iter().peekable();
-
-	while let Some((ancestor, ancestor_parent)) = iter.next().and_then(|a| iter.peek().map(|ap| (a, ap))) {
-		if query_session_index_for_child(ctx, *ancestor_parent).await? != desired_session {
-			break;
-		}
-		acc.push(ancestor);
-	}
-
-	debug_assert!(acc.len() <= k);
-	Ok(acc)
-}
-
-#[derive(Clone)]
-struct MetricsInner {
-	gossipped_availability_chunks: prometheus::Counter<prometheus::U64>,
-	handle_our_view_change: prometheus::Histogram,
-	process_incoming_peer_message: prometheus::Histogram,
-}
-
-/// Availability Distribution metrics.
-#[derive(Default, Clone)]
-pub struct Metrics(Option<MetricsInner>);
-
-impl Metrics {
-	fn on_chunk_distributed(&self) {
-		if let Some(metrics) = &self.0 {
-			metrics.gossipped_availability_chunks.inc();
-		}
-	}
-
-	/// Provide a timer for `handle_our_view_change` which observes on drop.
-	fn time_handle_our_view_change(&self) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
-		self.0.as_ref().map(|metrics| metrics.handle_our_view_change.start_timer())
-	}
-
-	/// Provide a timer for `process_incoming_peer_message` which observes on drop.
-	fn time_process_incoming_peer_message(&self) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
-		self.0.as_ref().map(|metrics| metrics.process_incoming_peer_message.start_timer())
-	}
-}
-
-impl metrics::Metrics for Metrics {
-	fn try_register(
-		registry: &prometheus::Registry,
-	) -> std::result::Result<Self, prometheus::PrometheusError> {
-		let metrics = MetricsInner {
-			gossipped_availability_chunks: prometheus::register(
-				prometheus::Counter::new(
-					"parachain_gossipped_availability_chunks_total",
-					"Number of availability chunks gossipped to other peers.",
-				)?,
-				registry,
-			)?,
-			handle_our_view_change: prometheus::register(
-				prometheus::Histogram::with_opts(
-					prometheus::HistogramOpts::new(
-						"parachain_availability_distribution_handle_our_view_change",
-						"Time spent within `availability_distribution::handle_our_view_change`",
-					)
-				)?,
-				registry,
-			)?,
-			process_incoming_peer_message: prometheus::register(
-				prometheus::Histogram::with_opts(
-					prometheus::HistogramOpts::new(
-						"parachain_availability_distribution_process_incoming_peer_message",
-						"Time spent within `availability_distribution::process_incoming_peer_message`",
-					)
-				)?,
-				registry,
-			)?,
-		};
-		Ok(Metrics(Some(metrics)))
-	}
-}
diff --git a/primitives/src/v0.rs b/primitives/src/v0.rs
index d806e4a93c71..951624566172 100644
--- a/primitives/src/v0.rs
+++ b/primitives/src/v0.rs
@@ -114,7 +114,8 @@ impl MallocSizeOf for ValidatorId {
 }
 
 /// Index of the validator is used as a lightweight replacement of the `ValidatorId` when appropriate.
-pub type ValidatorIndex = u32;
+#[derive(Debug, Eq, Ord, PartialEq, PartialOrd, Hash)]
+pub struct ValidatorIndex(u32);
 
 application_crypto::with_pair! {
 	/// A Parachain validator keypair.

From da850f7c07ab0824b5f8a2e6b98f4fa18e468ab4 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Tue, 9 Feb 2021 20:19:33 +0100
Subject: [PATCH 02/60] availability distribution, still very wip.

Work on the requesting side of things.
---
 .../availability-distribution/src/error.rs    |  58 +++++
 .../src/fetch_task.rs                         | 102 +++++++++
 .../availability-distribution/src/lib.rs      | 201 ++++++------------
 .../availability-distribution/src/state.rs    | 173 +++++++++++++++
 4 files changed, 393 insertions(+), 141 deletions(-)
 create mode 100644 node/network/availability-distribution/src/error.rs
 create mode 100644 node/network/availability-distribution/src/fetch_task.rs
 create mode 100644 node/network/availability-distribution/src/state.rs

diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs
new file mode 100644
index 000000000000..f70b2876a5ad
--- /dev/null
+++ b/node/network/availability-distribution/src/error.rs
@@ -0,0 +1,58 @@
+// Copyright 2021 Parity Technologies (UK) Ltd.
+// This file is part of Polkadot.
+
+// Polkadot is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Polkadot is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
+
+#[derive(Debug, Error)]
+enum Error {
+	#[error("Response channel to obtain StoreChunk failed")]
+	StoreChunkResponseChannel(#[source] oneshot::Canceled),
+
+	#[error("Response channel to obtain QueryChunk failed")]
+	QueryChunkResponseChannel(#[source] oneshot::Canceled),
+
+	#[error("Response channel to obtain QueryAncestors failed")]
+	QueryAncestorsResponseChannel(#[source] oneshot::Canceled),
+	#[error("RuntimeAPI to obtain QueryAncestors failed")]
+	QueryAncestors(#[source] ChainApiError),
+
+	#[error("Response channel to obtain QuerySession failed")]
+	QuerySessionResponseChannel(#[source] oneshot::Canceled),
+	#[error("RuntimeAPI to obtain QuerySession failed")]
+	QuerySession(#[source] RuntimeApiError),
+
+	#[error("Response channel to obtain QueryValidators failed")]
+	QueryValidatorsResponseChannel(#[source] oneshot::Canceled),
+	#[error("RuntimeAPI to obtain QueryValidators failed")]
+	QueryValidators(#[source] RuntimeApiError),
+
+	#[error("Response channel to obtain AvailabilityCores failed")]
+	AvailabilityCoresResponseChannel(#[source] oneshot::Canceled),
+	#[error("RuntimeAPI to obtain AvailabilityCores failed")]
+	AvailabilityCores(#[source] RuntimeApiError),
+
+	#[error("Response channel to obtain AvailabilityCores failed")]
+	QueryAvailabilityResponseChannel(#[source] oneshot::Canceled),
+
+	#[error("Receive channel closed")]
+	IncomingMessageChannel(#[source] SubsystemError),
+}
+
+type Result<T> = std::result::Result<T, Error>;
+
+impl From<SubsystemError> for Error {
+	fn from(err: SubsystemError) -> Self {
+		Self::IncomingMessageChannel(err)
+	}
+}
diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/fetch_task.rs
new file mode 100644
index 000000000000..8b583cb68f95
--- /dev/null
+++ b/node/network/availability-distribution/src/fetch_task.rs
@@ -0,0 +1,102 @@
+// Copyright 2021 Parity Technologies (UK) Ltd.
+// This file is part of Polkadot.
+
+// Polkadot is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Polkadot is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
+
+struct FetchTask {
+	/// For what relay parents this task is relevant.
+	///
+	/// In other words, for which relay chain parents this candidate is considered live.
+	/// This is updated on every `ActiveLeavesUpdate` and enables us to know when we can safely
+	/// stop keeping track of that candidate/chunk.
+	live_in: HashSet<Hash>,
+
+	/// The relay parent providing the context for the candidate.
+	relay_parent: Hash,
+
+	/// Some details about the to be fetched candidate.
+	descriptor: CandidateDescriptor,
+
+	/// We keep the task around in state `Fetched` until `live_in` becomes empty, to make
+	/// sure we won't re-fetch an already fetched candidate.
+	state: FetchedState,
+}
+
+/// State of a particular candidate chunk fetching process.
+enum FetchedState {
+	/// Chunk is currently being fetched.
+	Fetching,
+	/// Chunk has already been fetched successfully.
+	Fetched,
+	/// All relevant live_in have been removed, before we were able to get our chunk.
+	Canceled,
+}
+
+impl FetchTask {
+	/// Start fetching a chunk.
+	pub async fn start(ctx: &mut Context, leaf: Hash, core: OccupiedCore) -> Self {
+	}
+
+	/// Add the given leaf to the relay parents which are making this task relevant.
+	pub fn add_leaf(&mut self, leaf: Hash) {
+		self.live_in.insert(leaf);
+	}
+
+	/// Remove leaves and cancel the task, if it was the last one and the task has still been
+	/// fetching.
+	pub fn remove_leaves(&mut self, leaves: HashSet<Hash>) {
+		self.live_in.difference(leaves);
+		if self.live_in.is_empty() {
+			// TODO: Make sure, to actually cancel the task.
+			self.state = FetchedState::Canceled
+		}
+	}
+
+	/// Whether or not this task can be considered finished.
+	///
+	/// That is, it is either canceled or succeeded fetching the chunk.
+	pub fn is_finished(&self) -> bool {
+		match state {
+			FetchedState::Fetched | FetchedState::Canceled => true,
+			FetchedState::Fetching => false,
+		}
+	}
+
+	/// Retrieve the relay parent providing the context for this candidate.
+	pub fn get_relay_parent(&self) -> Hash {
+		self.relay_parent
+	}
+}
+
+/// Query the session index of a relay parent
+#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))]
+async fn query_session_index_for_child<Context>(
+	ctx: &mut Context,
+	relay_parent: Hash,
+) -> Result<SessionIndex>
+where
+	Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
+{
+	let (tx, rx) = oneshot::channel();
+	let query_session_idx_for_child = AllMessages::RuntimeApi(RuntimeApiMessage::Request(
+		relay_parent,
+		RuntimeApiRequest::SessionIndexForChild(tx),
+	));
+
+	ctx.send_message(query_session_idx_for_child)
+		.await;
+	rx.await
+		.map_err(|e| Error::QuerySessionResponseChannel(e))?
+		.map_err(|e| Error::QuerySession(e))
+}
diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs
index 9cd1f81337ab..47115bece0ea 100644
--- a/node/network/availability-distribution/src/lib.rs
+++ b/node/network/availability-distribution/src/lib.rs
@@ -1,4 +1,3 @@
-
 // Copyright 2021 Parity Technologies (UK) Ltd.
 // This file is part of Polkadot.
 
@@ -15,159 +14,79 @@
 // You should have received a copy of the GNU General Public License
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
 
-/// The bitfield distribution subsystem.
-pub struct AvailabilityDistributionSubsystem {
-	/// Pointer to a keystore, which is required for determining this nodes validator index.
-	keystore: SyncCryptoStorePtr,
-	/// Prometheus metrics.
-	metrics: Metrics,
-}
 
-/// Metadata about a candidate that is part of the live_candidates set.
-///
-/// Those which were not present in a cache are "fresh" and have their candidate descriptor attached. This
-/// information is propagated to the higher level where it can be used to create data entries. Cached candidates
-/// already have entries associated with them, and thus don't need this metadata to be fetched.
-#[derive(Debug)]
-enum FetchedLiveCandidate {
-	Cached,
-	Fresh(CandidateDescriptor),
-}
+/// Error and [`Result`] type for this subsystem.
+mod error;
+pub use error::Error;
+use error::Result;
 
-struct ProtocolState {
-	/// Candidates we need to fetch our chunk for.
-	chunks_to_fetch: HashMap<CandidateHash, CandidateDescriptor>,
+/// The actual implementation of running availability distribution.
+mod state;
+/// State of a running availability-distribution subsystem.
+use state::ProtocolState;
 
-	/// Localized information about sessions we are currently interested in.
-	///
-	/// This is usually the current one and at session boundaries also the last one.
-	session_infos: HashMap<SessionIndex, SessionInfo>,
+/// A task fetching a particular chunk.
+mod fetch_task;
 
-}
+const LOG_TARGET: &'static str = "availability_distribution";
 
-/// Localized session information, tailored for the needs of availability distribution.
-struct SessionInfo {
-	/// For each core we maintain a randomized list of corresponding validators.
-	///
-	/// This is so we can query them for chunks, trying them in order. As each validator will
-	/// have a randomized ordering, we should get good load balancing.
-	validator_groups: Vec<Vec<ValidatorIndex>>,
-}
 
-struct ChunkFetchingInfo {
-	descriptor: CandidateDescriptor,
-	/// Validators that backed the candidate and hopefully have our chunk.
-	backing_group: Vec<ValidatorIndex>,
-}
-
-fn run() {
-	/// Get current heads
-	/// For each chunk/slot, update randomized list of validators to query on session bundaries.
-	/// Fetch pending availability candidates and add them to `chunks_to_fetch`.
+/// The bitfield distribution subsystem.
+pub struct AvailabilityDistributionSubsystem {
+	/// Pointer to a keystore, which is required for determining this nodes validator index.
+	keystore: SyncCryptoStorePtr,
+	/// Prometheus metrics.
+	metrics: Metrics,
 }
 
-/// Obtain all live candidates under a particular relay head. This implicitly includes
-/// `K` ancestors of the head, such that the candidates pending availability in all of
-/// the states of the head and the ancestors are unioned together to produce the
-/// return type of this function. Each candidate hash is paired with information about
-/// from where it was fetched.
-///
-/// This also updates all `live_under` cached by the protocol state and returns a list
-/// of up to `K` ancestors of the relay-parent.
-#[tracing::instrument(level = "trace", skip(ctx, live_under), fields(subsystem = LOG_TARGET))]
-async fn query_live_candidates<Context>(
-	ctx: &mut Context,
-	live_under: &mut HashMap<Hash, HashSet<CandidateHash>>,
-	relay_parent: Hash,
-) -> Result<(HashMap<CandidateHash, FetchedLiveCandidate>, Vec<Hash>)>
+impl<Context> Subsystem<Context> for AvailabilityDistributionSubsystem
 where
-	Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
+	Context: SubsystemContext<Message = AvailabilityDistributionMessage> + Sync + Send,
 {
-	// register one of relay parents (not the ancestors)
-	let ancestors = query_up_to_k_ancestors_in_same_session(
-		ctx,
-		relay_parent,
-		AvailabilityDistributionSubsystem::K,
-	)
-	.await?;
-
-	// query the ones that were not present in the live_under cache and add them
-	// to it.
-	let live_candidates = query_pending_availability_at(
-		ctx,
-		ancestors.iter().cloned().chain(iter::once(relay_parent)),
-		live_under,
-	).await?;
-
-	Ok((live_candidates, ancestors))
+	fn start(self, ctx: Context) -> SpawnedSubsystem {
+		let future = self
+			.run(ctx, ProtocolState::new())
+			.map_err(|e| SubsystemError::with_origin("availability-distribution", e))
+			.boxed();
+
+		SpawnedSubsystem {
+			name: "availability-distribution-subsystem",
+			future,
+		}
+	}
 }
 
-/// Obtain all live candidates for all given `relay_blocks`.
-///
-/// This returns a set of all candidate hashes pending availability within the state
-/// of the explicitly referenced relay heads.
-///
-/// This also queries the provided `live_under` cache before reaching into the
-/// runtime and updates it with the information learned.
-#[tracing::instrument(level = "trace", skip(ctx, relay_blocks, live_under), fields(subsystem = LOG_TARGET))]
-async fn query_pending_availability_at<Context>(
-	ctx: &mut Context,
-	relay_blocks: impl IntoIterator<Item = Hash>,
-	live_under: &mut HashMap<Hash, HashSet<CandidateHash>>,
-) -> Result<HashMap<CandidateHash, FetchedLiveCandidate>>
-where
-	Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
-{
-	let mut live_candidates = HashMap::new();
-
-	// fetch and fill out cache for each of these
-	for relay_parent in relay_blocks {
-		let receipts_for = match live_under.entry(relay_parent) {
-			Entry::Occupied(e) => {
-				live_candidates.extend(
-					e.get().iter().cloned().map(|c| (c, FetchedLiveCandidate::Cached))
-				);
-				continue
-			},
-			e => e.or_default(),
-		};
-
-		for (receipt_hash, descriptor) in query_pending_availability(ctx, relay_parent).await? {
-			// unfortunately we have no good way of telling the candidate was
-			// cached until now. But we don't clobber a `Cached` entry if there
-			// is one already.
-			live_candidates.entry(receipt_hash).or_insert(FetchedLiveCandidate::Fresh(descriptor));
-			receipts_for.insert(receipt_hash);
-		}
+
+impl AvailabilityDistributionSubsystem {
+	/// Create a new instance of the availability distribution.
+	pub fn new(keystore: SyncCryptoStorePtr, metrics: Metrics) -> Self {
+		Self { keystore, metrics }
 	}
 
-	Ok(live_candidates)
+	/// Start processing work as passed on from the Overseer.
+	async fn run<Context>(self, ctx: Context, state: &mut ProtocolState) -> Result<()> {
+		loop {
+			let message = ctx.recv().await?;
+			match message {
+				FromOverseer::Signal(OverseerSignal::ActiveLeaves(update)) => {
+					// Update the relay chain heads we are fetching our pieces for:
+					state.update_fetching_heads(&mut ctx, update)?;
+				}
+				FromOverseer::Signal(OverseerSignal::BlockFinalized(..)) => {}
+				FromOverseer::Signal(OverseerSignal::Conclude) => {
+					return Ok(());
+				}
+				FromOverseer::Communication {
+					msg: AvailabilityDistributionMessage::AvailabilityFetchingRequest(_),
+				} => { 
+					// TODO: Implement issue 2306:
+					tracing::warn!(
+						target: LOG_TARGET,
+						"To be implemented, see: https://github.com/paritytech/polkadot/issues/2306!",
+						);
+				}
+			}
+		}
+	}
 }
 
-/// Query all hashes and descriptors of candidates pending availability at a particular block.
-#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))]
-async fn query_pending_availability<Context>(ctx: &mut Context, relay_parent: Hash)
-	-> Result<Vec<(CandidateHash, CandidateDescriptor)>>
-where
-	Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
-{
-	let (tx, rx) = oneshot::channel();
-	ctx.send_message(AllMessages::RuntimeApi(RuntimeApiMessage::Request(
-		relay_parent,
-		RuntimeApiRequest::AvailabilityCores(tx),
-	)))
-	.await;
-
-	let cores: Vec<_> = rx
-		.await
-		.map_err(|e| Error::AvailabilityCoresResponseChannel(e))?
-		.map_err(|e| Error::AvailabilityCores(e))?;
-
-	Ok(cores.into_iter()
-		.filter_map(|core_state| if let CoreState::Occupied(occupied) = core_state {
-			Some((occupied.candidate_hash, occupied.candidate_descriptor))
-		} else {
-			None
-		})
-		.collect())
-}
diff --git a/node/network/availability-distribution/src/state.rs b/node/network/availability-distribution/src/state.rs
new file mode 100644
index 000000000000..e9fcb7829134
--- /dev/null
+++ b/node/network/availability-distribution/src/state.rs
@@ -0,0 +1,173 @@
+// Copyright 2021 Parity Technologies (UK) Ltd.
+// This file is part of Polkadot.
+
+// Polkadot is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Polkadot is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
+
+//! `ProtocolState` representing a running availability distribution subsystem.
+
+use itertools::{Itertools, Either}
+
+use super::{Result, LOG_TARGET};
+
+/// A running instance of this subsystem.
+struct ProtocolState {
+	/// Candidates we need to fetch our chunk for.
+	fetches: HashMap<CandidateHash, FetchTask>,
+
+	/// Localized information about sessions we are currently interested in.
+	///
+	/// This is usually the current one and at session boundaries also the last one.
+	live_sessions: HashMap<SessionIndex, SessionInfo>,
+}
+
+/// Localized session information, tailored for the needs of availability distribution.
+struct SessionInfo {
+	/// Validator groups of the current session.
+	///
+	/// Each group's order is randomized. This way we achieve load balancing when requesting
+	/// chunks, as the validators in a group will be tried in that randomized order. Each node
+	/// should arrive at a different order, therefore we distribute the load.
+	validator_groups: Vec<Vec<ValidatorIndex>>,
+
+	/// Information about ourself:
+	validator_id: ValidatorId,
+
+	/// The relay parents we are keeping this entry for.
+	live_in: HashSet<Hash>,
+}
+
+struct ChunkFetchingInfo {
+	descriptor: CandidateDescriptor,
+	/// Validators that backed the candidate and hopefully have our chunk.
+	backing_group: Vec<ValidatorIndex>,
+}
+
+impl ProtocolState {
+	/// Update heads that need availability distribution.
+	///
+	/// For all active heads we will be fetching our chunk for availabilty distribution.
+	pub(crate) fn update_fetching_heads(
+		&mut self,
+		ctx: &mut Context,
+		update: ActiveLeavesUpdate,
+	) -> Result<()> {
+		let ActiveLeavesUpdate {
+			activated,
+			deactivated,
+		} = update;
+		// Order important! We need to handle activated, prior to deactivated, otherwise we might
+		// cancel still needed jobs.
+		self.start_requesting_chunks(ctx, activated)?;
+		let dead_parents = self.stop_requesting_chunks(ctx, deactivated)?;
+	}
+
+	/// Start requesting chunks for newly imported heads.
+	fn start_requesting_chunks(
+		&mut self,
+		ctx: &mut Context,
+		new_heads: &SmallVec<[(Hash, Arc<JaegerSpan>)]>,
+	) -> Result<()> {
+		for (leaf, _) in new_heads {
+			let cores = query_occupied_cores(ctx, leaf).await?;
+			add_cores(cores)?;
+		}
+		Ok(())
+	}
+
+	/// Stop requesting chunks for obsolete heads.
+	///
+	/// Returns relay_parents which became irrelevant for availability fetching (are not
+	/// referenced by any candidate anymore).
+	fn stop_requesting_chunks(
+		&mut self,
+		ctx: &mut Context,
+		obsolete_leaves: &SmallVec<[(Hash, Arc<JaegerSpan>)]>,
+	) -> Result<HashSet<Hash>> {
+		let obsolete_leaves: HashSet<_> = obsolete_leaves.into_iter().map(|h| h.0).collect();
+		let (obsolete_parents, new_fetches): (HashSet<_>, HashMap<_>) =
+			self.fetches.into_iter().partition_map(|(c_hash, task)| {
+				task.remove_leaves(HashSet::from(obsolete_leaves));
+				if task.is_finished() {
+					Either::Left(task.get_relay_parent())
+				} else {
+					Either::Right((c_hash, task))
+				}
+			});
+		self.fetches = new_fetches;
+		obsolete_parents
+	}
+
+	/// Add candidates corresponding for a particular relay parent.
+	///
+	/// Starting requests where necessary.
+	///
+	/// Note: The passed in `leaf` is not the same as CandidateDescriptor::relay_parent in the
+	/// given cores. The latter is the relay_parent this candidate considers its parent, while the
+	/// passed in leaf might be some later block where the candidate is still pending availability.
+	fn add_cores(
+		&mut self,
+		ctx: &mut Context,
+		leaf: Hash,
+		cores: impl IntoIter<Item = OccupiedCore>,
+	) {
+		for core in cores {
+			match self.fetches.entry(core.candidate_hash) {
+				Entry::Occupied(e) =>
+				// Just book keeping - we are already requesting that chunk:
+					e.relay_parents.insert(leaf),
+				Entry::Vacant(e) => {
+					e.insert(FetchTask::start(ctx, leaf, core))
+				}
+			}
+		}
+	}
+}
+
+/// Start requesting our chunk for the given core.
+fn start_request_chunk(core: OccupiedCore) -> FetchTask {
+	panic!("TODO: To be implemented!");
+}
+
+/// Query all hashes and descriptors of candidates pending availability at a particular block.
+#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))]
+async fn query_occupied_cores<Context>(
+	ctx: &mut Context,
+	relay_parent: Hash,
+) -> Result<Vec<OccupiedCore>>
+where
+	Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
+{
+	let (tx, rx) = oneshot::channel();
+	ctx.send_message(AllMessages::RuntimeApi(RuntimeApiMessage::Request(
+		relay_parent,
+		RuntimeApiRequest::AvailabilityCores(tx),
+	)))
+	.await;
+
+	let cores: Vec<_> = rx
+		.await
+		.map_err(|e| Error::AvailabilityCoresResponseChannel(e))?
+		.map_err(|e| Error::AvailabilityCores(e))?;
+
+	Ok(cores
+		.into_iter()
+		.filter_map(|core_state| {
+			if let CoreState::Occupied(occupied) = core_state {
+				Some(occupied)
+			} else {
+				None
+			}
+		})
+		.collect())
+}

From fbf0ec1a656b9a7afdfc9e9b282de816ac15582e Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Tue, 9 Feb 2021 21:29:04 +0100
Subject: [PATCH 03/60] Some docs on what I intend to do.

---
 .../availability-distribution/src/state.rs    | 40 ++++++++++++++++++-
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/node/network/availability-distribution/src/state.rs b/node/network/availability-distribution/src/state.rs
index e9fcb7829134..4e71032c9411 100644
--- a/node/network/availability-distribution/src/state.rs
+++ b/node/network/availability-distribution/src/state.rs
@@ -15,7 +15,43 @@
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
 
 //! `ProtocolState` representing a running availability distribution subsystem.
-
+//!
+//! We keep track of [`FetchTask`]s, which get created on [`ActiveLeavesUpdate`]s for each occupied
+//! core in the leaves, if we have not yet created it before. We keep track for which
+//! relay parents a `FetchTask` is considered live (corresponding slot is occupied with the
+//! candidate fetched). Once there is no relay parent left for which that task is considered live,
+//! it gets removed.
+//!
+//! We keep that task around as long as its corresponding candidate is considered pending
+//! availability, even if we fetched our chunk already. This is so we won't fetch our piece again,
+//! just because the candidate is still pending availability in the next block.
+//!
+//! We are also dependent on session information. We need to know which validators are in a
+//! particular validator group, backing our candidate, so we can request our erasure chunk from
+//! them.
+//!
+//! We want to randomize the list of validators in each group, so we get a
+//! random order of validators to try to get the chunk from. This is to ensure load balancing, each
+//! requesting validator should have a different order, thus trying different validators.
+//!
+//! But We would like to keep that randomized order around for an entire session, so our particular
+//! validator will always request from the same validators, thus making sure it will find an open
+//! network connection on each request.
+//!
+//! (TODO: What to do on session boundaries? Initial delay acceptable? Connect with some fake
+//! request to future validators? Use a peer set after all and connect that to the future session?)
+//!
+//! So we need to keep some customized session info around, which seems to be a good idea for
+//! performance reasons anyway. That's where `SessionCache` comes into play. It is used to keep
+//! session information around as long as we need it. But how long do we need it? How do we manage
+//! that cache? We can't rely on `ActiveLeavesUpdate`s heads alone, as we might get occupied slots
+//! for heads we never got an `ActiveLeavesUpdate` from, therefore we don't populate the session
+//! cache with sessions our leaves correspond to, but directly with the sessions of the relay
+//! parents of our `CandidateDescriptor`s. So, its clear how to populate the cache, but when can we
+//! get rid of cached session information? Easy! When there is no candidate/FetchTask around
+//! anymore which references it. Thus the cache simply consists of `Weak` pointers to the actual
+//! session infos and the `FetchTask`s keep `Rc`s, therefore we know exactly when we can get rid of
+//! a cache entry by means of the Weak pointer evaluating to `None`.
 use itertools::{Itertools, Either}
 
 use super::{Result, LOG_TARGET};
@@ -28,7 +64,7 @@ struct ProtocolState {
 	/// Localized information about sessions we are currently interested in.
 	///
 	/// This is usually the current one and at session boundaries also the last one.
-	live_sessions: HashMap<SessionIndex, SessionInfo>,
+	live_sessions: HashMap<SessionIndex, Weak<SessionInfo>>,
 }
 
 /// Localized session information, tailored for the needs of availability distribution.

From ac543c1e394f88e931641503324b02d6b3156b6c Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Wed, 10 Feb 2021 15:49:48 +0100
Subject: [PATCH 04/60] Checkpoint of session cache implementation

as I will likely replace it with something smarter.
---
 .../availability-distribution/src/error.rs    |   7 ++
 .../src/fetch_task.rs                         |   6 ++
 .../availability-distribution/src/lib.rs      |   3 +
 .../src/session_cache.rs                      | 100 ++++++++++++++++++
 .../availability-distribution/src/state.rs    |  27 ++---
 5 files changed, 122 insertions(+), 21 deletions(-)
 create mode 100644 node/network/availability-distribution/src/session_cache.rs

diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs
index f70b2876a5ad..3e7b1cee07af 100644
--- a/node/network/availability-distribution/src/error.rs
+++ b/node/network/availability-distribution/src/error.rs
@@ -13,6 +13,9 @@
 
 // You should have received a copy of the GNU General Public License
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
+//
+
+use subsystem_util::Error as UtilError;
 
 #[derive(Debug, Error)]
 enum Error {
@@ -47,6 +50,10 @@ enum Error {
 
 	#[error("Receive channel closed")]
 	IncomingMessageChannel(#[source] SubsystemError),
+
+    /// Some request to the runtime in the session cache failed.
+	#[error("Session cache runtime request failed")]
+	SessionCacheRuntimRequest(#[source] UtilError),
 }
 
 type Result<T> = std::result::Result<T, Error>;
diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/fetch_task.rs
index 8b583cb68f95..ff329993508c 100644
--- a/node/network/availability-distribution/src/fetch_task.rs
+++ b/node/network/availability-distribution/src/fetch_task.rs
@@ -14,6 +14,10 @@
 // You should have received a copy of the GNU General Public License
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
 
+use std::rc::Rc;
+
+use super::session_cache::SessionInfo;
+
 struct FetchTask {
 	/// For what relay parents this task is relevant.
 	///
@@ -31,6 +35,8 @@ struct FetchTask {
 	/// We keep the task around in state `Fetched` until `live_in` becomes empty, to make
 	/// sure we won't re-fetch an already fetched candidate.
 	state: FetchedState,
+
+    session: Rc<SessionInfo>
 }
 
 /// State of a particular candidate chunk fetching process.
diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs
index 47115bece0ea..7c7c4c9cc64c 100644
--- a/node/network/availability-distribution/src/lib.rs
+++ b/node/network/availability-distribution/src/lib.rs
@@ -28,6 +28,9 @@ use state::ProtocolState;
 /// A task fetching a particular chunk.
 mod fetch_task;
 
+/// Cache for session information.
+mod session_cache;
+
 const LOG_TARGET: &'static str = "availability_distribution";
 
 
diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs
new file mode 100644
index 000000000000..a0c2acc0a84e
--- /dev/null
+++ b/node/network/availability-distribution/src/session_cache.rs
@@ -0,0 +1,100 @@
+// Copyright 2021 Parity Technologies (UK) Ltd.
+// This file is part of Polkadot.
+
+// Polkadot is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Polkadot is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
+
+use std::rc::Weak;
+
+use super::{LOG_TARGET, error::Result, Error};
+
+/// Caching of session info as needed by availability distribution.
+///
+/// It should be ensured that a cached session stays live in the cache as long as we might need it.
+/// A warning will be logged, if an already dead entry gets fetched.
+struct SessionCache {
+	/// Maintain caches for session information for currently relay parents of interest.
+	///
+	/// Fast path - if we have an entry here, no query to the runtime is necessary at all.
+	by_relay_parent: HashMap<Hash, Weak<SessionInfo>,
+
+	/// Look up cached sessions by SessionIndex.
+	///
+	/// Slower path - we still have to look up the `SessionIndex` in the runtime, but still might have
+	/// the session ready already.
+	///
+	/// Note: Performance of fetching is really secondary here, but we need to ensure we are going
+	/// to get any existing cache entry, before fetching new information, as we should not mess up
+	/// the order of validators.
+	by_session_index: HashMap<SessionIndex, Weak<SessionInfo>,
+}
+
+/// Localized session information, tailored for the needs of availability distribution.
+pub struct SessionInfo {
+	/// Validator groups of the current session.
+	///
+	/// Each group's order is randomized. This way we achieve load balancing when requesting
+	/// chunks, as the validators in a group will be tried in that randomized order. Each node
+	/// should arrive at a different order, therefore we distribute the load.
+	pub validator_groups: Vec<Vec<ValidatorIndex>>,
+
+	/// Information about ourself:
+	pub our_index: ValidatorIndex,
+}
+
+impl SessionCache {
+
+	/// Retrieve session info for the given relay parent.
+	///
+	/// This function will query the cache first and will only query the runtime on cache miss.
+	pub fn fetch_session_info(&mut self, ctx: &mut Context, relay_parent: Hash) -> Result<Rc<SessionInfo>> {
+		if let Some(info) = self.get_by_relay_parent(relay_parent) {
+			return Ok(info)
+		}
+		let session_index = request_session_index_for_child_ctx(parent, ctx).await
+			.map_err(|e| Error::SessionCacheRuntimRequest(e))?;
+		if let Some(info) = self.get_by_session_index(session_index) {
+			self.by_relay_parent.insert(relay_parent, info.downgrade);
+			return Ok(info);
+		}
+
+	}
+	/// Get session info for a particular relay parent.
+	///
+	/// Returns: None, if no entry for that relay parent exists in the cache (or it was dead
+	/// already - which should not happen.)
+	fn get_by_relay_parent(&self, relay_parent: Hash) -> Option<Rc<SessionInfo>> {
+		let weak_ref = self.by_relay_parent.get(relay_parent)?;
+		upgrade_report_dead(weak_ref)
+	}
+
+	/// Get session info for a given `SessionIndex`.
+	fn get_by_session_index(&self, session_id: SessionId) -> Option<Rc<SessionInfo>> {
+		let weak_ref = self.by_session_id.get(session_id)?;
+		upgrade_report_dead(weak_ref)
+	}
+}
+
+/// Upgrade a weak SessionInfo reference.
+///
+/// Warn if it was dead already, as this should not happen. Cache should stay valid at least as
+/// long as we need it.
+fn upgrade_report_dead(info: Weak<SessionInfo>>) -> Option<Rc<SessionInfo>> {
+	match weak_ref.upgrade() {
+		Some(info) => Some(info),
+		None => {
+			tracing::warn!(LOG_TARGET, relay_parent, "A no longer cached session got requested, this should not happen in normal operation.");
+			None
+		}
+	}
+}
diff --git a/node/network/availability-distribution/src/state.rs b/node/network/availability-distribution/src/state.rs
index 4e71032c9411..7c6760b51862 100644
--- a/node/network/availability-distribution/src/state.rs
+++ b/node/network/availability-distribution/src/state.rs
@@ -48,13 +48,13 @@
 //! for heads we never got an `ActiveLeavesUpdate` from, therefore we don't populate the session
 //! cache with sessions our leaves correspond to, but directly with the sessions of the relay
 //! parents of our `CandidateDescriptor`s. So, its clear how to populate the cache, but when can we
-//! get rid of cached session information? Easy! When there is no candidate/FetchTask around
-//! anymore which references it. Thus the cache simply consists of `Weak` pointers to the actual
-//! session infos and the `FetchTask`s keep `Rc`s, therefore we know exactly when we can get rid of
-//! a cache entry by means of the Weak pointer evaluating to `None`.
+//! get rid of cached session information? If for sure is safe to do when there is no
+//! candidate/FetchTask around anymore which references it. Thus the cache simply consists of
+//! `Weak` pointers to the actual session infos and the `FetchTask`s keep `Rc`s, therefore we know
+//! exactly when we can get rid of a cache entry by means of the Weak pointer evaluating to `None`.
 use itertools::{Itertools, Either}
 
-use super::{Result, LOG_TARGET};
+use super::{Result, LOG_TARGET, session_cache::SessionCache};
 
 /// A running instance of this subsystem.
 struct ProtocolState {
@@ -64,24 +64,9 @@ struct ProtocolState {
 	/// Localized information about sessions we are currently interested in.
 	///
 	/// This is usually the current one and at session boundaries also the last one.
-	live_sessions: HashMap<SessionIndex, Weak<SessionInfo>>,
+	session_cache: SessionCache,
 }
 
-/// Localized session information, tailored for the needs of availability distribution.
-struct SessionInfo {
-	/// Validator groups of the current session.
-	///
-	/// Each group's order is randomized. This way we achieve load balancing when requesting
-	/// chunks, as the validators in a group will be tried in that randomized order. Each node
-	/// should arrive at a different order, therefore we distribute the load.
-	validator_groups: Vec<Vec<ValidatorIndex>>,
-
-	/// Information about ourself:
-	validator_id: ValidatorId,
-
-	/// The relay parents we are keeping this entry for.
-	live_in: HashSet<Hash>,
-}
 
 struct ChunkFetchingInfo {
 	descriptor: CandidateDescriptor,

From 07f6bc32ae14ac2f947ba127f3d401386a0a2f3d Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Thu, 11 Feb 2021 18:27:53 +0100
Subject: [PATCH 05/60] More work, mostly on cache

and getting things to type check.
---
 .../availability-distribution/Cargo.toml      |   2 +
 .../availability-distribution/src/error.rs    |  31 ++---
 .../src/fetch_task.rs                         |  58 +++++-----
 .../availability-distribution/src/lib.rs      |  17 ++-
 .../src/session_cache.rs                      | 108 +++++++++++++++---
 .../availability-distribution/src/state.rs    |  78 +++++++------
 node/subsystem-util/src/lib.rs                |   2 +-
 primitives/src/v0.rs                          |   4 +-
 8 files changed, 190 insertions(+), 110 deletions(-)

diff --git a/node/network/availability-distribution/Cargo.toml b/node/network/availability-distribution/Cargo.toml
index add6b2c43d33..1835078a3683 100644
--- a/node/network/availability-distribution/Cargo.toml
+++ b/node/network/availability-distribution/Cargo.toml
@@ -14,9 +14,11 @@ polkadot-erasure-coding = { path = "../../../erasure-coding" }
 polkadot-subsystem = { package = "polkadot-node-subsystem", path = "../../subsystem" }
 polkadot-node-network-protocol = { path = "../../network/protocol" }
 polkadot-node-subsystem-util = { path = "../../subsystem-util" }
+polkadot-node-core-runtime-api = { path = "../../core/runtime-api" }
 sp-core = { git = "https://github.com/paritytech/substrate", branch = "master", features = ["std"]  }
 sp-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" }
 thiserror = "1.0.23"
+rand = "0.8.3"
 
 [dev-dependencies]
 polkadot-subsystem-testhelpers = { package = "polkadot-node-subsystem-test-helpers", path = "../../subsystem-test-helpers" }
diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs
index 3e7b1cee07af..68551dbad62f 100644
--- a/node/network/availability-distribution/src/error.rs
+++ b/node/network/availability-distribution/src/error.rs
@@ -15,36 +15,21 @@
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
 //
 
-use subsystem_util::Error as UtilError;
+use thiserror::Error;
+
+use futures::channel::oneshot;
+
+use polkadot_node_subsystem_util::Error as UtilError;
+use polkadot_subsystem::{SubsystemError};
 
 #[derive(Debug, Error)]
-enum Error {
+pub enum Error {
 	#[error("Response channel to obtain StoreChunk failed")]
 	StoreChunkResponseChannel(#[source] oneshot::Canceled),
 
 	#[error("Response channel to obtain QueryChunk failed")]
 	QueryChunkResponseChannel(#[source] oneshot::Canceled),
 
-	#[error("Response channel to obtain QueryAncestors failed")]
-	QueryAncestorsResponseChannel(#[source] oneshot::Canceled),
-	#[error("RuntimeAPI to obtain QueryAncestors failed")]
-	QueryAncestors(#[source] ChainApiError),
-
-	#[error("Response channel to obtain QuerySession failed")]
-	QuerySessionResponseChannel(#[source] oneshot::Canceled),
-	#[error("RuntimeAPI to obtain QuerySession failed")]
-	QuerySession(#[source] RuntimeApiError),
-
-	#[error("Response channel to obtain QueryValidators failed")]
-	QueryValidatorsResponseChannel(#[source] oneshot::Canceled),
-	#[error("RuntimeAPI to obtain QueryValidators failed")]
-	QueryValidators(#[source] RuntimeApiError),
-
-	#[error("Response channel to obtain AvailabilityCores failed")]
-	AvailabilityCoresResponseChannel(#[source] oneshot::Canceled),
-	#[error("RuntimeAPI to obtain AvailabilityCores failed")]
-	AvailabilityCores(#[source] RuntimeApiError),
-
 	#[error("Response channel to obtain AvailabilityCores failed")]
 	QueryAvailabilityResponseChannel(#[source] oneshot::Canceled),
 
@@ -56,7 +41,7 @@ enum Error {
 	SessionCacheRuntimRequest(#[source] UtilError),
 }
 
-type Result<T> = std::result::Result<T, Error>;
+pub type Result<T> = std::result::Result<T, Error>;
 
 impl From<SubsystemError> for Error {
 	fn from(err: SubsystemError) -> Self {
diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/fetch_task.rs
index ff329993508c..d84d24001aa9 100644
--- a/node/network/availability-distribution/src/fetch_task.rs
+++ b/node/network/availability-distribution/src/fetch_task.rs
@@ -15,10 +15,26 @@
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
 
 use std::rc::Rc;
-
-use super::session_cache::SessionInfo;
-
-struct FetchTask {
+use std::collections::HashSet;
+
+use futures::channel::oneshot;
+
+use polkadot_primitives::v1::{
+	BlakeTwo256, CoreState, ErasureChunk, Hash, HashT,
+	SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID, CandidateHash,
+	CandidateDescriptor, OccupiedCore,
+};
+use polkadot_subsystem::{
+	jaeger, errors::{ChainApiError, RuntimeApiError}, PerLeafSpan,
+	ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem, Subsystem, SubsystemContext, SubsystemError,
+};
+use polkadot_subsystem::messages::{
+	AllMessages, AvailabilityDistributionMessage, AvailabilityStoreMessage, ChainApiMessage,
+	NetworkBridgeMessage, RuntimeApiMessage, RuntimeApiRequest, NetworkBridgeEvent
+};
+use super::{session_cache::SessionInfo, LOG_TARGET};
+
+pub struct FetchTask {
 	/// For what relay parents this task is relevant.
 	///
 	/// In other words, for which relay chain parents this candidate is considered live.
@@ -50,9 +66,13 @@ enum FetchedState {
 }
 
 impl FetchTask {
-	/// Start fetching a chunk.
-	pub async fn start(ctx: &mut Context, leaf: Hash, core: OccupiedCore) -> Self {
-	}
+//	/// Start fetching a chunk.
+	// pub async fn start<Context>(ctx: &mut Context, leaf: Hash, core: OccupiedCore) -> Self
+    // where
+    //     Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
+    // {
+	//     panic
+	// }
 
 	/// Add the given leaf to the relay parents which are making this task relevant.
 	pub fn add_leaf(&mut self, leaf: Hash) {
@@ -73,7 +93,7 @@ impl FetchTask {
 	///
 	/// That is, it is either canceled or succeeded fetching the chunk.
 	pub fn is_finished(&self) -> bool {
-		match state {
+		match self.state {
 			FetchedState::Fetched | FetchedState::Canceled => true,
 			FetchedState::Fetching => false,
 		}
@@ -84,25 +104,3 @@ impl FetchTask {
 		self.relay_parent
 	}
 }
-
-/// Query the session index of a relay parent
-#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))]
-async fn query_session_index_for_child<Context>(
-	ctx: &mut Context,
-	relay_parent: Hash,
-) -> Result<SessionIndex>
-where
-	Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
-{
-	let (tx, rx) = oneshot::channel();
-	let query_session_idx_for_child = AllMessages::RuntimeApi(RuntimeApiMessage::Request(
-		relay_parent,
-		RuntimeApiRequest::SessionIndexForChild(tx),
-	));
-
-	ctx.send_message(query_session_idx_for_child)
-		.await;
-	rx.await
-		.map_err(|e| Error::QuerySessionResponseChannel(e))?
-		.map_err(|e| Error::QuerySession(e))
-}
diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs
index 7c7c4c9cc64c..c0792db5d722 100644
--- a/node/network/availability-distribution/src/lib.rs
+++ b/node/network/availability-distribution/src/lib.rs
@@ -15,6 +15,13 @@
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
 
 
+use sp_keystore::SyncCryptoStorePtr;
+
+use polkadot_subsystem::{
+	jaeger, errors::{ChainApiError, RuntimeApiError}, PerLeafSpan,
+	ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem, Subsystem, SubsystemContext, SubsystemError, messages::AvailabilityDistributionMessage
+};
+
 /// Error and [`Result`] type for this subsystem.
 mod error;
 pub use error::Error;
@@ -34,6 +41,11 @@ mod session_cache;
 const LOG_TARGET: &'static str = "availability_distribution";
 
 
+
+/// Availability Distribution metrics.
+/// TODO: Dummy for now.
+type Metrics = ();
+
 /// The bitfield distribution subsystem.
 pub struct AvailabilityDistributionSubsystem {
 	/// Pointer to a keystore, which is required for determining this nodes validator index.
@@ -67,7 +79,10 @@ impl AvailabilityDistributionSubsystem {
 	}
 
 	/// Start processing work as passed on from the Overseer.
-	async fn run<Context>(self, ctx: Context, state: &mut ProtocolState) -> Result<()> {
+	async fn run<Context>(self, mut ctx: Context, state: &mut ProtocolState) -> Result<()>
+	where
+		Context: SubsystemContext<Message = AvailabilityDistributionMessage> + Sync + Send,
+	{
 		loop {
 			let message = ctx.recv().await?;
 			match message {
diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs
index a0c2acc0a84e..c5c34f35f002 100644
--- a/node/network/availability-distribution/src/session_cache.rs
+++ b/node/network/availability-distribution/src/session_cache.rs
@@ -14,19 +14,36 @@
 // You should have received a copy of the GNU General Public License
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
 
-use std::rc::Weak;
+use std::collections::HashMap;
+use std::rc::{Rc, Weak};
 
-use super::{LOG_TARGET, error::Result, Error};
+use rand::{seq::SliceRandom, thread_rng};
+
+use sp_keystore::{CryptoStore, SyncCryptoStorePtr};
+
+use super::{error::Result, Error, LOG_TARGET};
+use polkadot_node_subsystem_util::{
+	request_session_index_for_child_ctx, request_validator_groups_ctx, request_validators_ctx,
+};
+use polkadot_primitives::v1::{
+	BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, ErasureChunk, Hash, HashT,
+	SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID,
+};
+use polkadot_subsystem::{
+	errors::{ChainApiError, RuntimeApiError},
+	jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem,
+	Subsystem, SubsystemContext, SubsystemError,
+};
 
 /// Caching of session info as needed by availability distribution.
 ///
 /// It should be ensured that a cached session stays live in the cache as long as we might need it.
 /// A warning will be logged, if an already dead entry gets fetched.
-struct SessionCache {
+pub struct SessionCache {
 	/// Maintain caches for session information for currently relay parents of interest.
 	///
 	/// Fast path - if we have an entry here, no query to the runtime is necessary at all.
-	by_relay_parent: HashMap<Hash, Weak<SessionInfo>,
+	by_relay_parent: HashMap<Hash, Weak<SessionInfo>>,
 
 	/// Look up cached sessions by SessionIndex.
 	///
@@ -36,7 +53,10 @@ struct SessionCache {
 	/// Note: Performance of fetching is really secondary here, but we need to ensure we are going
 	/// to get any existing cache entry, before fetching new information, as we should not mess up
 	/// the order of validators.
-	by_session_index: HashMap<SessionIndex, Weak<SessionInfo>,
+	by_session_index: HashMap<SessionIndex, Weak<SessionInfo>>,
+
+	/// Key store for determining whether we are a validator and what `ValidatorIndex` we have.
+	keystore: SyncCryptoStorePtr,
 }
 
 /// Localized session information, tailored for the needs of availability distribution.
@@ -48,27 +68,60 @@ pub struct SessionInfo {
 	/// should arrive at a different order, therefore we distribute the load.
 	pub validator_groups: Vec<Vec<ValidatorIndex>>,
 
+	/// All validators of that session.
+	///
+	/// Needed for authority discovery and finding ourselves.
+	pub validators: Vec<ValidatorId>,
+
 	/// Information about ourself:
 	pub our_index: ValidatorIndex,
 }
 
 impl SessionCache {
-
 	/// Retrieve session info for the given relay parent.
 	///
 	/// This function will query the cache first and will only query the runtime on cache miss.
-	pub fn fetch_session_info(&mut self, ctx: &mut Context, relay_parent: Hash) -> Result<Rc<SessionInfo>> {
-		if let Some(info) = self.get_by_relay_parent(relay_parent) {
-			return Ok(info)
+	///
+	/// Returns: `Ok(None)` in case this node is not a validator in the current session.
+	pub async fn fetch_session_info<Context>(
+		&mut self,
+		ctx: &mut Context,
+		parent: Hash,
+	) -> Result<Option<Rc<SessionInfo>>>
+	where
+		Context: SubsystemContext,
+	{
+		if let Some(info) = self.get_by_relay_parent(parent) {
+			return Ok(Some(info));
 		}
-		let session_index = request_session_index_for_child_ctx(parent, ctx).await
+		let session_index = request_session_index_for_child_ctx(parent, ctx)
+			.await?
+			.await
 			.map_err(|e| Error::SessionCacheRuntimRequest(e))?;
 		if let Some(info) = self.get_by_session_index(session_index) {
-			self.by_relay_parent.insert(relay_parent, info.downgrade);
-			return Ok(info);
+			self.by_relay_parent.insert(parent, info.downgrade());
+			return Ok(Some(info));
 		}
-
+		if let Some((our_index, validators)) = self.query_validator_info(ctx, parent).await? {
+			let (mut validator_groups, _) = request_validator_groups_ctx(parent, ctx).await?.await?;
+			// Shuffle validators in groups:
+			let mut rng = thread_rng();
+			for g in validator_groups.iter_mut() {
+				g.shuffle(&rng)
+			}
+			let info = Rc::new(SessionInfo {
+				validator_groups,
+				validators,
+				our_index,
+			});
+			let downgraded = info.downgrade();
+			self.by_relay_parent.insert(parent, downgraded);
+			self.get_by_session_index.insert(session_index, downgraded);
+			return Ok(Some(info));
+		}
+		Ok(None)
 	}
+
 	/// Get session info for a particular relay parent.
 	///
 	/// Returns: None, if no entry for that relay parent exists in the cache (or it was dead
@@ -79,21 +132,40 @@ impl SessionCache {
 	}
 
 	/// Get session info for a given `SessionIndex`.
-	fn get_by_session_index(&self, session_id: SessionId) -> Option<Rc<SessionInfo>> {
-		let weak_ref = self.by_session_id.get(session_id)?;
+	fn get_by_session_index(&self, session_index: SessionIndex) -> Option<Rc<SessionInfo>> {
+		let weak_ref = self.by_session_index.get(session_index)?;
 		upgrade_report_dead(weak_ref)
 	}
+
+	/// Get our validator id and the validators in the current session.
+	///
+	/// Returns: Ok(None) if we are not a validator.
+	async fn query_validator_info<Context>(
+		&self,
+		&ctx: &mut Context,
+		parent: Hash,
+	) -> Result<Option<(ValidatorIndex, Vec<ValidatorId>)>> {
+		let validators = request_validators_ctx(ctx, parent).await?.await?;
+		for (i, v) in validators.iter().enumerate() {
+			if CryptoStore::has_keys(&*self.keystore, &[(v.to_raw_vec(), ValidatorId::ID)])
+				.await
+			{
+				return Ok(Some((i as ValidatorIndex, validators)));
+			}
+		}
+		Ok(None)
+	}
 }
 
 /// Upgrade a weak SessionInfo reference.
 ///
 /// Warn if it was dead already, as this should not happen. Cache should stay valid at least as
 /// long as we need it.
-fn upgrade_report_dead(info: Weak<SessionInfo>>) -> Option<Rc<SessionInfo>> {
-	match weak_ref.upgrade() {
+fn upgrade_report_dead(info: Weak<SessionInfo>) -> Option<Rc<SessionInfo>> {
+	match info.upgrade() {
 		Some(info) => Some(info),
 		None => {
-			tracing::warn!(LOG_TARGET, relay_parent, "A no longer cached session got requested, this should not happen in normal operation.");
+			tracing::warn!(LOG_TARGET, "A no longer cached session got requested, this should not happen in normal operation.");
 			None
 		}
 	}
diff --git a/node/network/availability-distribution/src/state.rs b/node/network/availability-distribution/src/state.rs
index 7c6760b51862..f47db8c94efa 100644
--- a/node/network/availability-distribution/src/state.rs
+++ b/node/network/availability-distribution/src/state.rs
@@ -52,12 +52,33 @@
 //! candidate/FetchTask around anymore which references it. Thus the cache simply consists of
 //! `Weak` pointers to the actual session infos and the `FetchTask`s keep `Rc`s, therefore we know
 //! exactly when we can get rid of a cache entry by means of the Weak pointer evaluating to `None`.
-use itertools::{Itertools, Either}
 
-use super::{Result, LOG_TARGET, session_cache::SessionCache};
+use std::collections::{
+	hash_map::{Entry, HashMap},
+	hash_set::HashSet,
+};
+use std::iter::IntoIterator;
+use std::sync::Arc;
+
+use futures::channel::oneshot;
+use jaeger::JaegerSpan;
+
+use itertools::{Either, Itertools};
+
+use super::{fetch_task::FetchTask, session_cache::SessionCache, Result, LOG_TARGET};
+use polkadot_primitives::v1::{
+	BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, ErasureChunk, Hash, HashT,
+	OccupiedCore, SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID,
+};
+use polkadot_subsystem::{
+	errors::{ChainApiError, RuntimeApiError},
+	jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem,
+	Subsystem, SubsystemContext, SubsystemError, messages::AvailabilityDistributionMessage,
+};
+use polkadot_node_subsystem_util::request_availability_cores_ctx;
 
 /// A running instance of this subsystem.
-struct ProtocolState {
+pub struct ProtocolState {
 	/// Candidates we need to fetch our chunk for.
 	fetches: HashMap<CandidateHash, FetchTask>,
 
@@ -67,7 +88,6 @@ struct ProtocolState {
 	session_cache: SessionCache,
 }
 
-
 struct ChunkFetchingInfo {
 	descriptor: CandidateDescriptor,
 	/// Validators that backed the candidate and hopefully have our chunk.
@@ -78,7 +98,7 @@ impl ProtocolState {
 	/// Update heads that need availability distribution.
 	///
 	/// For all active heads we will be fetching our chunk for availabilty distribution.
-	pub(crate) fn update_fetching_heads(
+	pub(crate) fn update_fetching_heads<Context>(
 		&mut self,
 		ctx: &mut Context,
 		update: ActiveLeavesUpdate,
@@ -94,14 +114,17 @@ impl ProtocolState {
 	}
 
 	/// Start requesting chunks for newly imported heads.
-	fn start_requesting_chunks(
+	async fn start_requesting_chunks<Context>(
 		&mut self,
 		ctx: &mut Context,
-		new_heads: &SmallVec<[(Hash, Arc<JaegerSpan>)]>,
-	) -> Result<()> {
+		new_heads: impl Iterator<Item = (Hash, Arc<JaegerSpan>)>,
+	) -> Result<()>
+	where
+		Context: SubsystemContext<Message = AvailabilityDistributionMessage> + Sync + Send,
+	{
 		for (leaf, _) in new_heads {
 			let cores = query_occupied_cores(ctx, leaf).await?;
-			add_cores(cores)?;
+			self.add_cores(ctx, leaf, cores)?;
 		}
 		Ok(())
 	}
@@ -110,10 +133,10 @@ impl ProtocolState {
 	///
 	/// Returns relay_parents which became irrelevant for availability fetching (are not
 	/// referenced by any candidate anymore).
-	fn stop_requesting_chunks(
+	fn stop_requesting_chunks<Context>(
 		&mut self,
 		ctx: &mut Context,
-		obsolete_leaves: &SmallVec<[(Hash, Arc<JaegerSpan>)]>,
+		obsolete_leaves: impl Iterator<Item = (Hash, Arc<JaegerSpan>)>,
 	) -> Result<HashSet<Hash>> {
 		let obsolete_leaves: HashSet<_> = obsolete_leaves.into_iter().map(|h| h.0).collect();
 		let (obsolete_parents, new_fetches): (HashSet<_>, HashMap<_>) =
@@ -136,50 +159,35 @@ impl ProtocolState {
 	/// Note: The passed in `leaf` is not the same as CandidateDescriptor::relay_parent in the
 	/// given cores. The latter is the relay_parent this candidate considers its parent, while the
 	/// passed in leaf might be some later block where the candidate is still pending availability.
-	fn add_cores(
+	fn add_cores<Context>(
 		&mut self,
 		ctx: &mut Context,
 		leaf: Hash,
-		cores: impl IntoIter<Item = OccupiedCore>,
+		cores: impl IntoIterator<Item = OccupiedCore>,
 	) {
 		for core in cores {
 			match self.fetches.entry(core.candidate_hash) {
 				Entry::Occupied(e) =>
 				// Just book keeping - we are already requesting that chunk:
-					e.relay_parents.insert(leaf),
-				Entry::Vacant(e) => {
-					e.insert(FetchTask::start(ctx, leaf, core))
+				{
+					e.relay_parents.insert(leaf)
 				}
+				Entry::Vacant(e) => e.insert(FetchTask::start(ctx, leaf, core)),
 			}
 		}
 	}
 }
 
-/// Start requesting our chunk for the given core.
-fn start_request_chunk(core: OccupiedCore) -> FetchTask {
-	panic!("TODO: To be implemented!");
-}
-
-/// Query all hashes and descriptors of candidates pending availability at a particular block.
-#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))]
+///// Query all hashes and descriptors of candidates pending availability at a particular block.
+// #[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))]
 async fn query_occupied_cores<Context>(
 	ctx: &mut Context,
 	relay_parent: Hash,
 ) -> Result<Vec<OccupiedCore>>
 where
-	Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
+	Context: SubsystemContext,
 {
-	let (tx, rx) = oneshot::channel();
-	ctx.send_message(AllMessages::RuntimeApi(RuntimeApiMessage::Request(
-		relay_parent,
-		RuntimeApiRequest::AvailabilityCores(tx),
-	)))
-	.await;
-
-	let cores: Vec<_> = rx
-		.await
-		.map_err(|e| Error::AvailabilityCoresResponseChannel(e))?
-		.map_err(|e| Error::AvailabilityCores(e))?;
+	let cores = request_availability_cores_ctx(relay_parent, ctx).await?.await;
 
 	Ok(cores
 		.into_iter()
diff --git a/node/subsystem-util/src/lib.rs b/node/subsystem-util/src/lib.rs
index 1eee4cc7f758..ace6f90d80bc 100644
--- a/node/subsystem-util/src/lib.rs
+++ b/node/subsystem-util/src/lib.rs
@@ -321,7 +321,7 @@ impl Validator {
 			.iter()
 			.enumerate()
 			.find(|(_, k)| k == &&key)
-			.map(|(idx, _)| idx as ValidatorIndex)
+			.map(|(idx, _)| ValidatorIndex(idx as u32))
 			.expect("signing_key would have already returned NotAValidator if the item we're searching for isn't in this list; qed");
 
 		Ok(Validator {
diff --git a/primitives/src/v0.rs b/primitives/src/v0.rs
index 951624566172..c8ea0fd6a014 100644
--- a/primitives/src/v0.rs
+++ b/primitives/src/v0.rs
@@ -114,8 +114,8 @@ impl MallocSizeOf for ValidatorId {
 }
 
 /// Index of the validator is used as a lightweight replacement of the `ValidatorId` when appropriate.
-#[derive(Debug, Eq, Ord, PartialEq, PartialOrd, Hash)]
-pub struct ValidatorIndex(u32);
+#[derive(Debug, Eq, Ord, PartialEq, PartialOrd, Hash, Copy, Clone, Encode, Decode, MallocSizeOf)]
+pub struct ValidatorIndex(pub u32);
 
 application_crypto::with_pair! {
 	/// A Parachain validator keypair.

From ef84ea5a447debadbdde2d41048ef304e1ecf6f6 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Fri, 12 Feb 2021 10:51:50 +0100
Subject: [PATCH 06/60] Only derive MallocSizeOf and Debug for std.

---
 primitives/src/v0.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/primitives/src/v0.rs b/primitives/src/v0.rs
index c8ea0fd6a014..8c6b4f538a54 100644
--- a/primitives/src/v0.rs
+++ b/primitives/src/v0.rs
@@ -114,7 +114,9 @@ impl MallocSizeOf for ValidatorId {
 }
 
 /// Index of the validator is used as a lightweight replacement of the `ValidatorId` when appropriate.
-#[derive(Debug, Eq, Ord, PartialEq, PartialOrd, Hash, Copy, Clone, Encode, Decode, MallocSizeOf)]
+#[derive(Eq, Ord, PartialEq, PartialOrd, Hash, Copy, Clone, Encode, Decode)]
+#[cfg(feature = "std")]
+#[derive(Debug, MallocSizeOf)]
 pub struct ValidatorIndex(pub u32);
 
 application_crypto::with_pair! {

From 1e3580413a35f99d403cbb36dc7f8d979e7d3f46 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Fri, 12 Feb 2021 13:59:13 +0100
Subject: [PATCH 07/60] availability-distribution: Cache feature complete.

---
 .../availability-distribution/Cargo.toml      |  3 +-
 .../src/session_cache.rs                      | 62 +++++++++++++------
 2 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/node/network/availability-distribution/Cargo.toml b/node/network/availability-distribution/Cargo.toml
index 1835078a3683..0ac0ed535418 100644
--- a/node/network/availability-distribution/Cargo.toml
+++ b/node/network/availability-distribution/Cargo.toml
@@ -15,15 +15,16 @@ polkadot-subsystem = { package = "polkadot-node-subsystem", path = "../../subsys
 polkadot-node-network-protocol = { path = "../../network/protocol" }
 polkadot-node-subsystem-util = { path = "../../subsystem-util" }
 polkadot-node-core-runtime-api = { path = "../../core/runtime-api" }
+sp-application-crypto = { git = "https://github.com/paritytech/substrate", branch = "master" }
 sp-core = { git = "https://github.com/paritytech/substrate", branch = "master", features = ["std"]  }
 sp-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" }
 thiserror = "1.0.23"
+itertools = "0.10.0"
 rand = "0.8.3"
 
 [dev-dependencies]
 polkadot-subsystem-testhelpers = { package = "polkadot-node-subsystem-test-helpers", path = "../../subsystem-test-helpers" }
 sp-core = { git = "https://github.com/paritytech/substrate", branch = "master", features = ["std"] }
-sp-application-crypto = { git = "https://github.com/paritytech/substrate", branch = "master" }
 sp-keyring = { git = "https://github.com/paritytech/substrate", branch = "master" }
 sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master" }
 sc-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" }
diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs
index c5c34f35f002..0f12ee66173c 100644
--- a/node/network/availability-distribution/src/session_cache.rs
+++ b/node/network/availability-distribution/src/session_cache.rs
@@ -19,9 +19,10 @@ use std::rc::{Rc, Weak};
 
 use rand::{seq::SliceRandom, thread_rng};
 
+use sp_application_crypto::AppKey;
+use sp_core::crypto::Public;
 use sp_keystore::{CryptoStore, SyncCryptoStorePtr};
 
-use super::{error::Result, Error, LOG_TARGET};
 use polkadot_node_subsystem_util::{
 	request_session_index_for_child_ctx, request_validator_groups_ctx, request_validators_ctx,
 };
@@ -30,11 +31,15 @@ use polkadot_primitives::v1::{
 	SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID,
 };
 use polkadot_subsystem::{
-	errors::{ChainApiError, RuntimeApiError},
 	jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem,
 	Subsystem, SubsystemContext, SubsystemError,
 };
 
+use super::{
+	error::{recv_runtime, Result},
+	Error, LOG_TARGET,
+};
+
 /// Caching of session info as needed by availability distribution.
 ///
 /// It should be ensured that a cached session stays live in the cache as long as we might need it.
@@ -94,29 +99,35 @@ impl SessionCache {
 		if let Some(info) = self.get_by_relay_parent(parent) {
 			return Ok(Some(info));
 		}
-		let session_index = request_session_index_for_child_ctx(parent, ctx)
-			.await?
-			.await
-			.map_err(|e| Error::SessionCacheRuntimRequest(e))?;
+		let session_index =
+			recv_runtime(request_session_index_for_child_ctx(parent, ctx).await).await?;
 		if let Some(info) = self.get_by_session_index(session_index) {
-			self.by_relay_parent.insert(parent, info.downgrade());
+			self.by_relay_parent.insert(parent, Rc::downgrade(&info));
 			return Ok(Some(info));
 		}
+
+		// About to fetch new stuff, time to get rid of dead bodies: We keep relay_parent to
+		// session info matches way longer than necessary (for an entire session), but the overhead
+		// should be low enough to not matter.
+		self.bury_dead();
 		if let Some((our_index, validators)) = self.query_validator_info(ctx, parent).await? {
-			let (mut validator_groups, _) = request_validator_groups_ctx(parent, ctx).await?.await?;
+			let (mut validator_groups, _) =
+				recv_runtime(request_validator_groups_ctx(parent, ctx).await).await?;
+
 			// Shuffle validators in groups:
 			let mut rng = thread_rng();
 			for g in validator_groups.iter_mut() {
-				g.shuffle(&rng)
+				g.shuffle(&mut rng)
 			}
+
 			let info = Rc::new(SessionInfo {
 				validator_groups,
 				validators,
 				our_index,
 			});
-			let downgraded = info.downgrade();
-			self.by_relay_parent.insert(parent, downgraded);
-			self.get_by_session_index.insert(session_index, downgraded);
+			let downgraded = Rc::downgrade(&info);
+			self.by_relay_parent.insert(parent, downgraded.clone());
+			self.by_session_index.insert(session_index, downgraded);
 			return Ok(Some(info));
 		}
 		Ok(None)
@@ -126,14 +137,14 @@ impl SessionCache {
 	///
 	/// Returns: None, if no entry for that relay parent exists in the cache (or it was dead
 	/// already - which should not happen.)
-	fn get_by_relay_parent(&self, relay_parent: Hash) -> Option<Rc<SessionInfo>> {
-		let weak_ref = self.by_relay_parent.get(relay_parent)?;
+	fn get_by_relay_parent(&self, parent: Hash) -> Option<Rc<SessionInfo>> {
+		let weak_ref = self.by_relay_parent.get(&parent)?;
 		upgrade_report_dead(weak_ref)
 	}
 
 	/// Get session info for a given `SessionIndex`.
 	fn get_by_session_index(&self, session_index: SessionIndex) -> Option<Rc<SessionInfo>> {
-		let weak_ref = self.by_session_index.get(session_index)?;
+		let weak_ref = self.by_session_index.get(&session_index)?;
 		upgrade_report_dead(weak_ref)
 	}
 
@@ -142,26 +153,37 @@ impl SessionCache {
 	/// Returns: Ok(None) if we are not a validator.
 	async fn query_validator_info<Context>(
 		&self,
-		&ctx: &mut Context,
+		ctx: &mut Context,
 		parent: Hash,
-	) -> Result<Option<(ValidatorIndex, Vec<ValidatorId>)>> {
-		let validators = request_validators_ctx(ctx, parent).await?.await?;
+	) -> Result<Option<(ValidatorIndex, Vec<ValidatorId>)>>
+	where
+		Context: SubsystemContext,
+	{
+		let validators = recv_runtime(request_validators_ctx(parent, ctx).await).await?;
 		for (i, v) in validators.iter().enumerate() {
 			if CryptoStore::has_keys(&*self.keystore, &[(v.to_raw_vec(), ValidatorId::ID)])
 				.await
 			{
-				return Ok(Some((i as ValidatorIndex, validators)));
+				return Ok(Some((ValidatorIndex(i as u32), validators)));
 			}
 		}
 		Ok(None)
 	}
+
+	/// Get rid of the dead bodies from time to time.
+	fn bury_dead(&mut self) {
+		self.by_session_index
+			.retain(|_, info| info.upgrade().is_some());
+		self.by_relay_parent
+			.retain(|_, info| info.upgrade().is_some());
+	}
 }
 
 /// Upgrade a weak SessionInfo reference.
 ///
 /// Warn if it was dead already, as this should not happen. Cache should stay valid at least as
 /// long as we need it.
-fn upgrade_report_dead(info: Weak<SessionInfo>) -> Option<Rc<SessionInfo>> {
+fn upgrade_report_dead(info: &Weak<SessionInfo>) -> Option<Rc<SessionInfo>> {
 	match info.upgrade() {
 		Some(info) => Some(info),
 		None => {

From d8fda81ec6726172da3a72b6b0fb1b33539f9b22 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Sat, 13 Feb 2021 09:59:15 +0100
Subject: [PATCH 08/60] Sketch out logic in `FetchTask` for actual fetching.

- Compile fixes.
- Cleanup.
---
 .../availability-distribution/src/error.rs    |  32 ++-
 .../src/fetch_task.rs                         | 216 +++++++++++++++---
 .../src/session_cache.rs                      |   7 +-
 .../availability-distribution/src/state.rs    |  51 +++--
 4 files changed, 247 insertions(+), 59 deletions(-)

diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs
index 68551dbad62f..0e29294b2c0c 100644
--- a/node/network/availability-distribution/src/error.rs
+++ b/node/network/availability-distribution/src/error.rs
@@ -20,7 +20,10 @@ use thiserror::Error;
 use futures::channel::oneshot;
 
 use polkadot_node_subsystem_util::Error as UtilError;
-use polkadot_subsystem::{SubsystemError};
+use polkadot_subsystem::{
+	errors::{ChainApiError, RuntimeApiError},
+	SubsystemError,
+};
 
 #[derive(Debug, Error)]
 pub enum Error {
@@ -36,9 +39,17 @@ pub enum Error {
 	#[error("Receive channel closed")]
 	IncomingMessageChannel(#[source] SubsystemError),
 
-    /// Some request to the runtime in the session cache failed.
-	#[error("Session cache runtime request failed")]
-	SessionCacheRuntimRequest(#[source] UtilError),
+	/// Some request to utility functions failed.
+	#[error("Runtime request failed")]
+	UtilRequest(#[source] UtilError),
+
+	/// Some request to the runtime failed.
+	#[error("Runtime request failed")]
+	RuntimeRequestCanceled(#[source] oneshot::Canceled),
+
+	/// Some request to the runtime failed.
+	#[error("Runtime request failed")]
+	RuntimeRequest(#[source] RuntimeApiError),
 }
 
 pub type Result<T> = std::result::Result<T, Error>;
@@ -48,3 +59,16 @@ impl From<SubsystemError> for Error {
 		Self::IncomingMessageChannel(err)
 	}
 }
+
+/// Receive a response from a runtime request and convert errors.
+pub(crate) async fn recv_runtime<V>(
+	r: std::result::Result<
+		oneshot::Receiver<std::result::Result<V, RuntimeApiError>>,
+		UtilError,
+	>,
+) -> Result<V> {
+	r.map_err(Error::UtilRequest)?
+		.await
+		.map_err(Error::RuntimeRequestCanceled)?
+		.map_err(Error::RuntimeRequest)
+}
diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/fetch_task.rs
index d84d24001aa9..e1c8409db625 100644
--- a/node/network/availability-distribution/src/fetch_task.rs
+++ b/node/network/availability-distribution/src/fetch_task.rs
@@ -14,25 +14,27 @@
 // You should have received a copy of the GNU General Public License
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
 
-use std::rc::Rc;
 use std::collections::HashSet;
+use std::rc::Rc;
 
 use futures::channel::oneshot;
+use v1::AvailabilityFetchingResponse;
 
+use super::{session_cache::SessionInfo, LOG_TARGET};
+use polkadot_node_network_protocol::request_response::v1;
 use polkadot_primitives::v1::{
-	BlakeTwo256, CoreState, ErasureChunk, Hash, HashT,
-	SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID, CandidateHash,
-	CandidateDescriptor, OccupiedCore,
-};
-use polkadot_subsystem::{
-	jaeger, errors::{ChainApiError, RuntimeApiError}, PerLeafSpan,
-	ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem, Subsystem, SubsystemContext, SubsystemError,
+	BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, ErasureChunk, Hash, HashT,
+	OccupiedCore, SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID,
 };
 use polkadot_subsystem::messages::{
 	AllMessages, AvailabilityDistributionMessage, AvailabilityStoreMessage, ChainApiMessage,
-	NetworkBridgeMessage, RuntimeApiMessage, RuntimeApiRequest, NetworkBridgeEvent
+	NetworkBridgeEvent, NetworkBridgeMessage, RuntimeApiMessage, RuntimeApiRequest,
+};
+use polkadot_subsystem::{
+	errors::{ChainApiError, RuntimeApiError},
+	jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem,
+	Subsystem, SubsystemContext, SubsystemError,
 };
-use super::{session_cache::SessionInfo, LOG_TARGET};
 
 pub struct FetchTask {
 	/// For what relay parents this task is relevant.
@@ -42,37 +44,109 @@ pub struct FetchTask {
 	/// stop keeping track of that candidate/chunk.
 	live_in: HashSet<Hash>,
 
-	/// The relay parent providing the context for the candidate.
-	relay_parent: Hash,
-
-	/// Some details about the to be fetched candidate.
-	descriptor: CandidateDescriptor,
-
 	/// We keep the task around in state `Fetched` until `live_in` becomes empty, to make
 	/// sure we won't re-fetch an already fetched candidate.
 	state: FetchedState,
 
-    session: Rc<SessionInfo>
+	/// Session information.
+	session: Rc<SessionInfo>,
 }
 
 /// State of a particular candidate chunk fetching process.
 enum FetchedState {
 	/// Chunk is currently being fetched.
-	Fetching,
+	///
+	/// Once the contained `Sender` is dropped, any still running task will be canceled.
+	Fetching(oneshot::Sender<()>),
 	/// Chunk has already been fetched successfully.
 	Fetched,
 	/// All relevant live_in have been removed, before we were able to get our chunk.
 	Canceled,
 }
 
+/// Messages sent from `FetchTask`s to be handled/forwarded.
+pub enum FromFetchTask {
+	/// Message to other subsystem.
+	Message(AllMessages),
+
+	/// Concluded with result.
+	///
+	/// In case of `None` everything was fine, in case of `Some` some validators in the group
+	/// did not serve us our chunk as expected.
+	Concluded(Option<BadValidators>),
+}
+
+/// Report of bad validators.
+pub struct BadValidators {
+	/// The session index that was used.
+	pub session_index: SessionIndex,
+	/// The group the not properly responding validators are.
+	pub group_index: GroupIndex,
+	/// The indeces of the bad validators.
+	pub bad_validators: Vec<ValidatorIndex>,
+}
+
+/// Information a running task needs.
+struct RunningTask {
+	/// For what session we have been spawned.
+	session_index: SessionIndex,
+
+	/// Index of validator group.
+	group_index: GroupIndex,
+
+	/// Validators to request the chunk from.
+	group: Vec<ValidatorIndex>,
+
+	/// The request to send.
+	request: v1::AvailabilityFetchingRequest,
+
+	/// Root hash, for verifying the chunks validity.
+	erasure_root: Hash,
+
+	/// Relay parent of the candidate to fetch.
+	relay_parent: Hash,
+
+	/// Sender for communicating with other subsystems and reporting results.
+	sender: mpsc::Sender<FromFetchTask>,
+
+	/// Receive `Canceled` errors here.
+	receiver: oneshot::Receiver<()>,
+}
+
 impl FetchTask {
-//	/// Start fetching a chunk.
-	// pub async fn start<Context>(ctx: &mut Context, leaf: Hash, core: OccupiedCore) -> Self
-    // where
-    //     Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
-    // {
-	//     panic
-	// }
+	/// Start fetching a chunk.
+	pub async fn start<Context>(
+		ctx: &mut Context,
+		leaf: Hash,
+		core: OccupiedCore,
+		session_info: Rc<SessionInfo>,
+		sender: mpsc::Sender<FromFetchTask>,
+	) -> Self
+	where
+		Context: SubsystemContext,
+	{
+		let (handle, receiver) = oneshot::channel();
+		let running =  RunningTask {
+			session_index: session_info.session_index,
+			group_index: core.group_responsible,
+			group: session_info.validator_groups.get(core.group_responsible).expect("The responsible group of a candidate should be available in the corresponding session. qed.").clone(),
+			request: v1::AvailabilityFetchingRequest {
+				candidate_hash: core.candidate_hash,
+				index: session_info.our_index,
+			},
+			erasure_root: core.candidate_descriptor.erasure_root,
+			relay_parent: core.candidate_descriptor.relay_parent,
+			sender,
+			receiver,
+		};
+		ctx.spawn("chunk-fetcher", Pin::new(Box::new(running.run())))
+			.await?;
+		FetchTask {
+			live_in: HashSet::from(leaf),
+			state: FetchedState::Fetching(handle),
+			session: session_info,
+		}
+	}
 
 	/// Add the given leaf to the relay parents which are making this task relevant.
 	pub fn add_leaf(&mut self, leaf: Hash) {
@@ -101,6 +175,96 @@ impl FetchTask {
 
 	/// Retrieve the relay parent providing the context for this candidate.
 	pub fn get_relay_parent(&self) -> Hash {
-		self.relay_parent
+		self.descriptor.relay_parent
+	}
+}
+
+/// Things that can go wrong in task execution.
+#[derive(Debug)]
+enum TaskError {
+	/// The peer failed to deliver a correct chunk for some reason (has been reported as
+	/// appropriate).
+	PeerError,
+	/// This very node is seemingly shutting down (sending of message failed).
+	ShuttingDown,
+}
+
+type Result<T> = std::result::Result<T, TaskError>;
+
+impl RunningTask {
+	async fn run(self) {
+		let bad_validators = Vec::new();
+		// Try validators in order:
+		for index in self.group {
+
+			// Send request:
+			let resp = match do_request(index).await {
+				Ok(resp) => resp,
+				Err(TaskError::ShuttingDown) => {
+					tracking::info("Node seems to be shutting down, canceling fetch task");
+					return;
+				}
+				Err(TaskError::PeerError) => {
+					bad_validators.push(index);
+					continue
+				}
+			};
+
+			// Data valid?
+			if !self.validate_response(&resp) {
+				bad_validators.push(index);
+				continue
+			}
+
+			// Ok, let's store it and be happy.
+			store_response(resp);
+			break;
+		}
+		conclude(bad_validators);
+	}
+
+	/// Do request and return response, if successful.
+	///
+	/// Will also report peer if not successful.
+	async fn do_request(&self, validator: ValidatorIndex) -> std::result::Result<v1::AvailabilityFetchingResponse, TaskError> {
+		let peer = self.get_peer_id(index)?;
+		let (full_request, response_recv) =
+			Requests::AvailabilityFetching(OutgoingRequest::new(peer, self.request));
+
+		self.sender.send(FromFetchTask::Message(
+			AllMessages::NetworkBridgeMessage::SendRequests(Vec::from(full_request)),
+		)).await.map_err(|| TaskError::ShuttingDown)?;
+
+		match response_recv.await {
+			Ok(resp) => Some(resp),
+			Err(RequestError::InvalidResponse(err)) => {
+			},
+			Err(RequestError::NetworkError(err)) => {
+			}
+			Err(RequestError::Canceled(err)) => {
+			}
+		}
+		Err(PeerError)
+	}
+
+	fn get_peer_id(index: ValidatorIndex) -> Result<PeerId> {
+		panic!("TO BE IMPLEMENTED");
+	}
+
+	/// Tell subsystem we are done.
+	async fn conclude(&self, bad_validators: Vec<ValidatorIndex>) {
+		let payload = if bad_validators.is_empty() {
+			None
+		}
+		else {
+			Some(BadValidators {
+				session_index: self.session_index,
+				group_index: self.group_index,
+				bad_validators,
+			})
+		};
+		if let Err(err) =  self.sender.send(FromFetchTask::Concluded(payload)).await {
+			tracing::warn!(LOG_TARGET, err: ?err, "Sending concluded message for task failed");
+		}
 	}
 }
diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs
index 0f12ee66173c..043022f31fcc 100644
--- a/node/network/availability-distribution/src/session_cache.rs
+++ b/node/network/availability-distribution/src/session_cache.rs
@@ -66,6 +66,8 @@ pub struct SessionCache {
 
 /// Localized session information, tailored for the needs of availability distribution.
 pub struct SessionInfo {
+	/// The index of this session.
+	pub session_index: SessionIndex,
 	/// Validator groups of the current session.
 	///
 	/// Each group's order is randomized. This way we achieve load balancing when requesting
@@ -73,11 +75,6 @@ pub struct SessionInfo {
 	/// should arrive at a different order, therefore we distribute the load.
 	pub validator_groups: Vec<Vec<ValidatorIndex>>,
 
-	/// All validators of that session.
-	///
-	/// Needed for authority discovery and finding ourselves.
-	pub validators: Vec<ValidatorId>,
-
 	/// Information about ourself:
 	pub our_index: ValidatorIndex,
 }
diff --git a/node/network/availability-distribution/src/state.rs b/node/network/availability-distribution/src/state.rs
index f47db8c94efa..c12a94dcb948 100644
--- a/node/network/availability-distribution/src/state.rs
+++ b/node/network/availability-distribution/src/state.rs
@@ -88,12 +88,6 @@ pub struct ProtocolState {
 	session_cache: SessionCache,
 }
 
-struct ChunkFetchingInfo {
-	descriptor: CandidateDescriptor,
-	/// Validators that backed the candidate and hopefully have our chunk.
-	backing_group: Vec<ValidatorIndex>,
-}
-
 impl ProtocolState {
 	/// Update heads that need availability distribution.
 	///
@@ -102,15 +96,18 @@ impl ProtocolState {
 		&mut self,
 		ctx: &mut Context,
 		update: ActiveLeavesUpdate,
-	) -> Result<()> {
+	) -> Result<()> 
+	where
+		Context: SubsystemContext,
+	{
 		let ActiveLeavesUpdate {
 			activated,
 			deactivated,
 		} = update;
 		// Order important! We need to handle activated, prior to deactivated, otherwise we might
 		// cancel still needed jobs.
-		self.start_requesting_chunks(ctx, activated)?;
-		let dead_parents = self.stop_requesting_chunks(ctx, deactivated)?;
+		self.start_requesting_chunks(ctx, activated).await?;
+		self.stop_requesting_chunks(ctx, deactivated)?;
 	}
 
 	/// Start requesting chunks for newly imported heads.
@@ -124,7 +121,7 @@ impl ProtocolState {
 	{
 		for (leaf, _) in new_heads {
 			let cores = query_occupied_cores(ctx, leaf).await?;
-			self.add_cores(ctx, leaf, cores)?;
+			self.add_cores(ctx, leaf, cores).await?;
 		}
 		Ok(())
 	}
@@ -133,23 +130,22 @@ impl ProtocolState {
 	///
 	/// Returns relay_parents which became irrelevant for availability fetching (are not
 	/// referenced by any candidate anymore).
-	fn stop_requesting_chunks<Context>(
+	fn stop_requesting_chunks(
 		&mut self,
-		ctx: &mut Context,
 		obsolete_leaves: impl Iterator<Item = (Hash, Arc<JaegerSpan>)>,
 	) -> Result<HashSet<Hash>> {
 		let obsolete_leaves: HashSet<_> = obsolete_leaves.into_iter().map(|h| h.0).collect();
-		let (obsolete_parents, new_fetches): (HashSet<_>, HashMap<_>) =
-			self.fetches.into_iter().partition_map(|(c_hash, task)| {
+		let new_fetches =
+			self.fetches.into_iter().filter_map(|(c_hash, task)| {
 				task.remove_leaves(HashSet::from(obsolete_leaves));
 				if task.is_finished() {
-					Either::Left(task.get_relay_parent())
-				} else {
-					Either::Right((c_hash, task))
+					Some(task.get_relay_parent())
+				}
+				else {
+					None
 				}
-			});
+			}).collect();
 		self.fetches = new_fetches;
-		obsolete_parents
 	}
 
 	/// Add candidates corresponding for a particular relay parent.
@@ -164,15 +160,22 @@ impl ProtocolState {
 		ctx: &mut Context,
 		leaf: Hash,
 		cores: impl IntoIterator<Item = OccupiedCore>,
-	) {
+	) 
+	where
+		Context: SubsystemContext,
+	{
 		for core in cores {
 			match self.fetches.entry(core.candidate_hash) {
 				Entry::Occupied(e) =>
 				// Just book keeping - we are already requesting that chunk:
-				{
-					e.relay_parents.insert(leaf)
+					e.get_mut().add_leaf(leaf),
+				Entry::Vacant(e) => {
+					let session_info = self.session_cache.fetch_session_info(ctx, core.candidate_descriptor.relay_parent)?;
+					if let Some(session_info) = session_info {
+						e.insert(FetchTask::start(ctx, leaf, core, session_info))
+					}
+					// Not a validator, nothing to do.
 				}
-				Entry::Vacant(e) => e.insert(FetchTask::start(ctx, leaf, core)),
 			}
 		}
 	}
@@ -187,7 +190,7 @@ async fn query_occupied_cores<Context>(
 where
 	Context: SubsystemContext,
 {
-	let cores = request_availability_cores_ctx(relay_parent, ctx).await?.await;
+	let cores = recv_runtime(request_availability_cores_ctx(relay_parent, ctx).await).await?;
 
 	Ok(cores
 		.into_iter()

From 47036c9840ca67c3bafb9d1aacdc50e53c99da74 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Sat, 13 Feb 2021 10:03:53 +0100
Subject: [PATCH 09/60] Format cleanup.

---
 .../src/fetch_task.rs                         | 39 +++++++++++--------
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/fetch_task.rs
index e1c8409db625..2be986a7d5c7 100644
--- a/node/network/availability-distribution/src/fetch_task.rs
+++ b/node/network/availability-distribution/src/fetch_task.rs
@@ -196,7 +196,6 @@ impl RunningTask {
 		let bad_validators = Vec::new();
 		// Try validators in order:
 		for index in self.group {
-
 			// Send request:
 			let resp = match do_request(index).await {
 				Ok(resp) => resp,
@@ -206,14 +205,14 @@ impl RunningTask {
 				}
 				Err(TaskError::PeerError) => {
 					bad_validators.push(index);
-					continue
+					continue;
 				}
 			};
 
 			// Data valid?
 			if !self.validate_response(&resp) {
 				bad_validators.push(index);
-				continue
+				continue;
 			}
 
 			// Ok, let's store it and be happy.
@@ -226,23 +225,26 @@ impl RunningTask {
 	/// Do request and return response, if successful.
 	///
 	/// Will also report peer if not successful.
-	async fn do_request(&self, validator: ValidatorIndex) -> std::result::Result<v1::AvailabilityFetchingResponse, TaskError> {
+	async fn do_request(
+		&self,
+		validator: ValidatorIndex,
+	) -> std::result::Result<v1::AvailabilityFetchingResponse, TaskError> {
 		let peer = self.get_peer_id(index)?;
 		let (full_request, response_recv) =
 			Requests::AvailabilityFetching(OutgoingRequest::new(peer, self.request));
 
-		self.sender.send(FromFetchTask::Message(
-			AllMessages::NetworkBridgeMessage::SendRequests(Vec::from(full_request)),
-		)).await.map_err(|| TaskError::ShuttingDown)?;
+		self.sender
+			.send(FromFetchTask::Message(
+				AllMessages::NetworkBridgeMessage::SendRequests(Vec::from(full_request)),
+			))
+			.await
+			.map_err(|| TaskError::ShuttingDown)?;
 
 		match response_recv.await {
 			Ok(resp) => Some(resp),
-			Err(RequestError::InvalidResponse(err)) => {
-			},
-			Err(RequestError::NetworkError(err)) => {
-			}
-			Err(RequestError::Canceled(err)) => {
-			}
+			Err(RequestError::InvalidResponse(err)) => {}
+			Err(RequestError::NetworkError(err)) => {}
+			Err(RequestError::Canceled(err)) => {}
 		}
 		Err(PeerError)
 	}
@@ -255,16 +257,19 @@ impl RunningTask {
 	async fn conclude(&self, bad_validators: Vec<ValidatorIndex>) {
 		let payload = if bad_validators.is_empty() {
 			None
-		}
-		else {
+		} else {
 			Some(BadValidators {
 				session_index: self.session_index,
 				group_index: self.group_index,
 				bad_validators,
 			})
 		};
-		if let Err(err) =  self.sender.send(FromFetchTask::Concluded(payload)).await {
-			tracing::warn!(LOG_TARGET, err: ?err, "Sending concluded message for task failed");
+		if let Err(err) = self.sender.send(FromFetchTask::Concluded(payload)).await {
+			tracing::warn!(
+				LOG_TARGET,
+				err: ?err,
+				"Sending concluded message for task failed"
+			);
 		}
 	}
 }

From 4ad902fd985535b13bc3ffb34b9523f560f4e2f2 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Sat, 13 Feb 2021 10:07:54 +0100
Subject: [PATCH 10/60] More format fixes.

---
 .../src/fetch_task.rs                         |  1 +
 .../availability-distribution/src/state.rs    | 33 +++++++++++--------
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/fetch_task.rs
index 2be986a7d5c7..c2421c0638c4 100644
--- a/node/network/availability-distribution/src/fetch_task.rs
+++ b/node/network/availability-distribution/src/fetch_task.rs
@@ -240,6 +240,7 @@ impl RunningTask {
 			.await
 			.map_err(|| TaskError::ShuttingDown)?;
 
+		// TODO: Also handle receiver cancel.
 		match response_recv.await {
 			Ok(resp) => Some(resp),
 			Err(RequestError::InvalidResponse(err)) => {}
diff --git a/node/network/availability-distribution/src/state.rs b/node/network/availability-distribution/src/state.rs
index c12a94dcb948..2334902af070 100644
--- a/node/network/availability-distribution/src/state.rs
+++ b/node/network/availability-distribution/src/state.rs
@@ -66,16 +66,18 @@ use jaeger::JaegerSpan;
 use itertools::{Either, Itertools};
 
 use super::{fetch_task::FetchTask, session_cache::SessionCache, Result, LOG_TARGET};
+use polkadot_node_subsystem_util::request_availability_cores_ctx;
 use polkadot_primitives::v1::{
 	BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, ErasureChunk, Hash, HashT,
 	OccupiedCore, SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID,
 };
 use polkadot_subsystem::{
 	errors::{ChainApiError, RuntimeApiError},
-	jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem,
-	Subsystem, SubsystemContext, SubsystemError, messages::AvailabilityDistributionMessage,
+	jaeger,
+	messages::AvailabilityDistributionMessage,
+	ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem, Subsystem,
+	SubsystemContext, SubsystemError,
 };
-use polkadot_node_subsystem_util::request_availability_cores_ctx;
 
 /// A running instance of this subsystem.
 pub struct ProtocolState {
@@ -96,7 +98,7 @@ impl ProtocolState {
 		&mut self,
 		ctx: &mut Context,
 		update: ActiveLeavesUpdate,
-	) -> Result<()> 
+	) -> Result<()>
 	where
 		Context: SubsystemContext,
 	{
@@ -135,16 +137,18 @@ impl ProtocolState {
 		obsolete_leaves: impl Iterator<Item = (Hash, Arc<JaegerSpan>)>,
 	) -> Result<HashSet<Hash>> {
 		let obsolete_leaves: HashSet<_> = obsolete_leaves.into_iter().map(|h| h.0).collect();
-		let new_fetches =
-			self.fetches.into_iter().filter_map(|(c_hash, task)| {
+		let new_fetches = self
+			.fetches
+			.into_iter()
+			.filter_map(|(c_hash, task)| {
 				task.remove_leaves(HashSet::from(obsolete_leaves));
 				if task.is_finished() {
 					Some(task.get_relay_parent())
-				}
-				else {
+				} else {
 					None
 				}
-			}).collect();
+			})
+			.collect();
 		self.fetches = new_fetches;
 	}
 
@@ -160,17 +164,20 @@ impl ProtocolState {
 		ctx: &mut Context,
 		leaf: Hash,
 		cores: impl IntoIterator<Item = OccupiedCore>,
-	) 
-	where
+	) where
 		Context: SubsystemContext,
 	{
 		for core in cores {
 			match self.fetches.entry(core.candidate_hash) {
 				Entry::Occupied(e) =>
 				// Just book keeping - we are already requesting that chunk:
-					e.get_mut().add_leaf(leaf),
+				{
+					e.get_mut().add_leaf(leaf)
+				}
 				Entry::Vacant(e) => {
-					let session_info = self.session_cache.fetch_session_info(ctx, core.candidate_descriptor.relay_parent)?;
+					let session_info = self
+						.session_cache
+						.fetch_session_info(ctx, core.candidate_descriptor.relay_parent)?;
 					if let Some(session_info) = session_info {
 						e.insert(FetchTask::start(ctx, leaf, core, session_info))
 					}

From fee9476e5665887b89e02672aae5da40a4eede6b Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Mon, 15 Feb 2021 15:08:06 +0100
Subject: [PATCH 11/60] Almost feature complete `fetch_task`.

Missing:

- Check for cancel
- Actual querying of peer ids.
---
 .../availability-distribution/Cargo.toml      |   1 +
 .../src/fetch_task.rs                         | 146 +++++++++++++-----
 2 files changed, 110 insertions(+), 37 deletions(-)

diff --git a/node/network/availability-distribution/Cargo.toml b/node/network/availability-distribution/Cargo.toml
index 0ac0ed535418..03cd654d6f0a 100644
--- a/node/network/availability-distribution/Cargo.toml
+++ b/node/network/availability-distribution/Cargo.toml
@@ -28,5 +28,6 @@ sp-core = { git = "https://github.com/paritytech/substrate", branch = "master",
 sp-keyring = { git = "https://github.com/paritytech/substrate", branch = "master" }
 sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master" }
 sc-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" }
+sc-network = { git = "https://github.com/paritytech/substrate", branch = "master" }
 assert_matches = "1.4.0"
 maplit = "1.0"
diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/fetch_task.rs
index c2421c0638c4..22558fea2e5d 100644
--- a/node/network/availability-distribution/src/fetch_task.rs
+++ b/node/network/availability-distribution/src/fetch_task.rs
@@ -15,16 +15,22 @@
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
 
 use std::collections::HashSet;
+use std::pin::Pin;
 use std::rc::Rc;
 
+use futures::channel::mpsc;
 use futures::channel::oneshot;
-use v1::AvailabilityFetchingResponse;
 
-use super::{session_cache::SessionInfo, LOG_TARGET};
-use polkadot_node_network_protocol::request_response::v1;
+use sc_network::PeerId;
+
+use polkadot_erasure_coding::branch_hash;
+use polkadot_node_network_protocol::request_response::{
+	request::{OutgoingRequest, RequestError, Requests},
+	v1::{AvailabilityFetchingRequest, AvailabilityFetchingResponse},
+};
 use polkadot_primitives::v1::{
-	BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, ErasureChunk, Hash, HashT,
-	OccupiedCore, SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID,
+	BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, ErasureChunk, GroupIndex, Hash,
+	HashT, OccupiedCore, SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID,
 };
 use polkadot_subsystem::messages::{
 	AllMessages, AvailabilityDistributionMessage, AvailabilityStoreMessage, ChainApiMessage,
@@ -36,6 +42,8 @@ use polkadot_subsystem::{
 	Subsystem, SubsystemContext, SubsystemError,
 };
 
+use super::{session_cache::SessionInfo, LOG_TARGET};
+
 pub struct FetchTask {
 	/// For what relay parents this task is relevant.
 	///
@@ -54,12 +62,10 @@ pub struct FetchTask {
 
 /// State of a particular candidate chunk fetching process.
 enum FetchedState {
-	/// Chunk is currently being fetched.
+	/// Chunk fetch has started.
 	///
 	/// Once the contained `Sender` is dropped, any still running task will be canceled.
-	Fetching(oneshot::Sender<()>),
-	/// Chunk has already been fetched successfully.
-	Fetched,
+	Started(oneshot::Sender<()>),
 	/// All relevant live_in have been removed, before we were able to get our chunk.
 	Canceled,
 }
@@ -98,7 +104,7 @@ struct RunningTask {
 	group: Vec<ValidatorIndex>,
 
 	/// The request to send.
-	request: v1::AvailabilityFetchingRequest,
+	request: AvailabilityFetchingRequest,
 
 	/// Root hash, for verifying the chunks validity.
 	erasure_root: Hash,
@@ -106,6 +112,9 @@ struct RunningTask {
 	/// Relay parent of the candidate to fetch.
 	relay_parent: Hash,
 
+	/// Hash of the candidate we are fetching our chunk for.
+	candidate_hash: CandidateHash,
+
 	/// Sender for communicating with other subsystems and reporting results.
 	sender: mpsc::Sender<FromFetchTask>,
 
@@ -130,12 +139,13 @@ impl FetchTask {
 			session_index: session_info.session_index,
 			group_index: core.group_responsible,
 			group: session_info.validator_groups.get(core.group_responsible).expect("The responsible group of a candidate should be available in the corresponding session. qed.").clone(),
-			request: v1::AvailabilityFetchingRequest {
+			request: AvailabilityFetchingRequest {
 				candidate_hash: core.candidate_hash,
 				index: session_info.our_index,
 			},
 			erasure_root: core.candidate_descriptor.erasure_root,
 			relay_parent: core.candidate_descriptor.relay_parent,
+			candidate_hash: core.candidate_hash,
 			sender,
 			receiver,
 		};
@@ -143,7 +153,7 @@ impl FetchTask {
 			.await?;
 		FetchTask {
 			live_in: HashSet::from(leaf),
-			state: FetchedState::Fetching(handle),
+			state: FetchedState::Started(handle),
 			session: session_info,
 		}
 	}
@@ -165,11 +175,11 @@ impl FetchTask {
 
 	/// Whether or not this task can be considered finished.
 	///
-	/// That is, it is either canceled or succeeded fetching the chunk.
+	/// That is, it is either canceled, succeeded or failed.
 	pub fn is_finished(&self) -> bool {
 		match self.state {
-			FetchedState::Fetched | FetchedState::Canceled => true,
-			FetchedState::Fetching => false,
+			FetchedState::Canceled => true,
+			FetchedState::Started(sender) => sender.is_canceled(),
 		}
 	}
 
@@ -192,15 +202,22 @@ enum TaskError {
 type Result<T> = std::result::Result<T, TaskError>;
 
 impl RunningTask {
+	/// Fetch and store chunk.
+	///
+	/// Try validators in backing group in order.
 	async fn run(self) {
 		let bad_validators = Vec::new();
 		// Try validators in order:
 		for index in self.group {
 			// Send request:
-			let resp = match do_request(index).await {
+			let peer_id = self.get_peer_id(index)?;
+			let resp = match self.do_request(peer_id).await {
 				Ok(resp) => resp,
 				Err(TaskError::ShuttingDown) => {
-					tracking::info("Node seems to be shutting down, canceling fetch task");
+					tracing::info!(
+						target: LOG_TARGET,
+						"Node seems to be shutting down, canceling fetch task"
+					);
 					return;
 				}
 				Err(TaskError::PeerError) => {
@@ -208,30 +225,30 @@ impl RunningTask {
 					continue;
 				}
 			};
+			let chunk = match resp {
+				AvailabilityFetchingResponse::Chunk(chunk) => chunk,
+			};
 
-			// Data valid?
-			if !self.validate_response(&resp) {
+			// Data genuine?
+			if !self.validate_chunk(peer_id, &chunk) {
 				bad_validators.push(index);
 				continue;
 			}
 
-			// Ok, let's store it and be happy.
-			store_response(resp);
+			// Ok, let's store it and be happy:
+			self.store_chunk(chunk).await;
 			break;
 		}
-		conclude(bad_validators);
+		self.conclude(bad_validators);
 	}
 
 	/// Do request and return response, if successful.
-	///
-	/// Will also report peer if not successful.
 	async fn do_request(
 		&self,
-		validator: ValidatorIndex,
-	) -> std::result::Result<v1::AvailabilityFetchingResponse, TaskError> {
-		let peer = self.get_peer_id(index)?;
-		let (full_request, response_recv) =
-			Requests::AvailabilityFetching(OutgoingRequest::new(peer, self.request));
+		peer: PeerId,
+	) -> std::result::Result<AvailabilityFetchingResponse, TaskError> {
+		let (full_request, response_recv) = OutgoingRequest::new(peer, self.request);
+		let requests = Requests::AvailabilityFetching(Vec::from(full_request));
 
 		self.sender
 			.send(FromFetchTask::Message(
@@ -240,20 +257,75 @@ impl RunningTask {
 			.await
 			.map_err(|| TaskError::ShuttingDown)?;
 
-		// TODO: Also handle receiver cancel.
 		match response_recv.await {
-			Ok(resp) => Some(resp),
-			Err(RequestError::InvalidResponse(err)) => {}
-			Err(RequestError::NetworkError(err)) => {}
-			Err(RequestError::Canceled(err)) => {}
+			Ok(resp) => Ok(resp),
+			Err(RequestError::InvalidResponse(err)) => {
+				tracing::warn!(
+					target: LOG_TARGET,
+					"Peer sent us invalid erasure chunk data"
+				);
+				Err(TaskError::PeerError)
+			}
+			Err(RequestError::NetworkError(err)) => {
+				tracing::warn!(
+					target: LOG_TARGET,
+					"Some network error occurred when fetching erasure chunk"
+				);
+				Err(TaskError::PeerError)
+			}
+			Err(RequestError::Canceled(err)) => {
+				tracing::warn!(target: LOG_TARGET, "Erasure chunk request got canceled");
+				Err(TaskError::PeerError)
+			}
+		}
+	}
+
+	fn validate_chunk(&self, peer_id: &PeerId, chunk: &ErasureChunk) -> bool {
+		let anticipated_hash =
+			match branch_hash(&self.erasure_root, &chunk.proof, chunk.index as usize) {
+				Ok(hash) => hash,
+				Err(e) => {
+					tracing::trace!(
+					target: LOG_TARGET,
+					candidate_hash = ?self.candidate_hash,
+					origin = ?peer_id,
+					error = ?e,
+					"Failed to calculate chunk merkle proof",
+					);
+					return false;
+				}
+			};
+		let erasure_chunk_hash = BlakeTwo256::hash(&chunk.chunk);
+		if anticipated_hash != erasure_chunk_hash {
+			tracing::warn!(target: LOG_TARGET, origin = ?peer_id,  "Received chunk does not match merkle tree");
+			return false;
 		}
-		Err(PeerError)
+		true
 	}
 
 	fn get_peer_id(index: ValidatorIndex) -> Result<PeerId> {
 		panic!("TO BE IMPLEMENTED");
 	}
 
+	/// Store given chunk and log any error.
+	async fn store_chunk(&self, chunk: ErasureChunk) {
+		let (tx, rx) = oneshot::channel();
+		self.sender
+			.send(FromFetchTask::Message(AllMessages::AvailabilityStore(
+				AvailabilityStoreMessage::StoreChunk {
+					candidate_hash: self.candidate_hash,
+					relay_parent: self.relay_parent,
+					chunk,
+					tx,
+				},
+			)))
+			.await;
+
+		if let Err(oneshot::Canceled) = rx.await {
+			tracing::error!(target: LOG_TARGET, "Storing erasure chunk failed");
+		}
+	}
+
 	/// Tell subsystem we are done.
 	async fn conclude(&self, bad_validators: Vec<ValidatorIndex>) {
 		let payload = if bad_validators.is_empty() {
@@ -267,8 +339,8 @@ impl RunningTask {
 		};
 		if let Err(err) = self.sender.send(FromFetchTask::Concluded(payload)).await {
 			tracing::warn!(
-				LOG_TARGET,
-				err: ?err,
+				target: LOG_TARGET,
+				err= ?err,
 				"Sending concluded message for task failed"
 			);
 		}

From b9aa906dfe8687e0963f77769b66dc55eeeeafb4 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Mon, 15 Feb 2021 22:35:57 +0100
Subject: [PATCH 12/60] Finish FetchTask so far.

---
 .../src/fetch_task.rs                         | 64 +++++++++++--------
 .../src/session_cache.rs                      |  9 ++-
 .../availability-distribution/src/state.rs    | 51 ++++++++-------
 3 files changed, 72 insertions(+), 52 deletions(-)

diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/fetch_task.rs
index 22558fea2e5d..f482593fb9ea 100644
--- a/node/network/availability-distribution/src/fetch_task.rs
+++ b/node/network/availability-distribution/src/fetch_task.rs
@@ -20,6 +20,8 @@ use std::rc::Rc;
 
 use futures::channel::mpsc;
 use futures::channel::oneshot;
+use futures::future::select;
+use futures::SinkExt;
 
 use sc_network::PeerId;
 
@@ -39,7 +41,7 @@ use polkadot_subsystem::messages::{
 use polkadot_subsystem::{
 	errors::{ChainApiError, RuntimeApiError},
 	jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem,
-	Subsystem, SubsystemContext, SubsystemError,
+	Subsystem, SubsystemContext, SubsystemError, SubsystemResult
 };
 
 use super::{session_cache::SessionInfo, LOG_TARGET};
@@ -117,9 +119,6 @@ struct RunningTask {
 
 	/// Sender for communicating with other subsystems and reporting results.
 	sender: mpsc::Sender<FromFetchTask>,
-
-	/// Receive `Canceled` errors here.
-	receiver: oneshot::Receiver<()>,
 }
 
 impl FetchTask {
@@ -130,15 +129,15 @@ impl FetchTask {
 		core: OccupiedCore,
 		session_info: Rc<SessionInfo>,
 		sender: mpsc::Sender<FromFetchTask>,
-	) -> Self
+	) -> SubsystemResult<Self>
 	where
 		Context: SubsystemContext,
 	{
-		let (handle, receiver) = oneshot::channel();
+		let (handle, kill) = oneshot::channel();
 		let running =  RunningTask {
 			session_index: session_info.session_index,
 			group_index: core.group_responsible,
-			group: session_info.validator_groups.get(core.group_responsible).expect("The responsible group of a candidate should be available in the corresponding session. qed.").clone(),
+			group: session_info.validator_groups.get(core.group_responsible.into() as usize).expect("The responsible group of a candidate should be available in the corresponding session. qed.").clone(),
 			request: AvailabilityFetchingRequest {
 				candidate_hash: core.candidate_hash,
 				index: session_info.our_index,
@@ -147,15 +146,14 @@ impl FetchTask {
 			relay_parent: core.candidate_descriptor.relay_parent,
 			candidate_hash: core.candidate_hash,
 			sender,
-			receiver,
 		};
-		ctx.spawn("chunk-fetcher", Pin::new(Box::new(running.run())))
+		ctx.spawn("chunk-fetcher", Pin::new(Box::new(running.run(kill))))
 			.await?;
-		FetchTask {
-			live_in: HashSet::from(leaf),
+		Ok(FetchTask {
+			live_in: vec![leaf].into_iter().collect(),
 			state: FetchedState::Started(handle),
 			session: session_info,
-		}
+		})
 	}
 
 	/// Add the given leaf to the relay parents which are making this task relevant.
@@ -166,9 +164,8 @@ impl FetchTask {
 	/// Remove leaves and cancel the task, if it was the last one and the task has still been
 	/// fetching.
 	pub fn remove_leaves(&mut self, leaves: HashSet<Hash>) {
-		self.live_in.difference(leaves);
+		self.live_in.difference(&leaves);
 		if self.live_in.is_empty() {
-			// TODO: Make sure, to actually cancel the task.
 			self.state = FetchedState::Canceled
 		}
 	}
@@ -183,9 +180,10 @@ impl FetchTask {
 		}
 	}
 
-	/// Retrieve the relay parent providing the context for this candidate.
-	pub fn get_relay_parent(&self) -> Hash {
-		self.descriptor.relay_parent
+	/// Whether or not there are still relay parents around with this candidate pending
+	/// availability.
+	pub fn is_live(&self) -> bool {
+		!self.live_in.is_empty()
 	}
 }
 
@@ -202,15 +200,31 @@ enum TaskError {
 type Result<T> = std::result::Result<T, TaskError>;
 
 impl RunningTask {
+	async fn run(self, kill: oneshot::Receiver<()>) {
+		// Wait for completion/or cancel.
+		let _ = select(self.run_inner(), kill);
+	}
+
 	/// Fetch and store chunk.
 	///
 	/// Try validators in backing group in order.
-	async fn run(self) {
+	async fn run_inner(self) {
 		let bad_validators = Vec::new();
 		// Try validators in order:
 		for index in self.group {
 			// Send request:
-			let peer_id = self.get_peer_id(index)?;
+			let peer_id = match self.get_peer_id(index).await {
+				Ok(peer_id) => peer_id,
+				Err(err) => {
+					tracing::warn!(
+						target: LOG_TARGET,
+						validator_index = ?index,
+						"Discoverying peer id for validator failed"
+					);
+					bad_validators.push(index);
+					continue
+				}
+			};
 			let resp = match self.do_request(peer_id).await {
 				Ok(resp) => resp,
 				Err(TaskError::ShuttingDown) => {
@@ -248,14 +262,14 @@ impl RunningTask {
 		peer: PeerId,
 	) -> std::result::Result<AvailabilityFetchingResponse, TaskError> {
 		let (full_request, response_recv) = OutgoingRequest::new(peer, self.request);
-		let requests = Requests::AvailabilityFetching(Vec::from(full_request));
+		let requests = Requests::AvailabilityFetching(full_request);
 
 		self.sender
 			.send(FromFetchTask::Message(
-				AllMessages::NetworkBridgeMessage::SendRequests(Vec::from(full_request)),
+				AllMessages::NetworkBridge(NetworkBridgeMessage::SendRequests(vec![requests])),
 			))
 			.await
-			.map_err(|| TaskError::ShuttingDown)?;
+			.map_err(|_| TaskError::ShuttingDown)?;
 
 		match response_recv.await {
 			Ok(resp) => Ok(resp),
@@ -303,12 +317,12 @@ impl RunningTask {
 		true
 	}
 
-	fn get_peer_id(index: ValidatorIndex) -> Result<PeerId> {
+	fn get_peer_id(&self, index: ValidatorIndex) -> Result<PeerId> {
 		panic!("TO BE IMPLEMENTED");
 	}
 
 	/// Store given chunk and log any error.
-	async fn store_chunk(&self, chunk: ErasureChunk) {
+	async fn store_chunk(&mut self, chunk: ErasureChunk) {
 		let (tx, rx) = oneshot::channel();
 		self.sender
 			.send(FromFetchTask::Message(AllMessages::AvailabilityStore(
@@ -327,7 +341,7 @@ impl RunningTask {
 	}
 
 	/// Tell subsystem we are done.
-	async fn conclude(&self, bad_validators: Vec<ValidatorIndex>) {
+	async fn conclude(&mut self, bad_validators: Vec<ValidatorIndex>) {
 		let payload = if bad_validators.is_empty() {
 			None
 		} else {
diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs
index 043022f31fcc..a0957c6c3916 100644
--- a/node/network/availability-distribution/src/session_cache.rs
+++ b/node/network/availability-distribution/src/session_cache.rs
@@ -80,6 +80,13 @@ pub struct SessionInfo {
 }
 
 impl SessionCache {
+	pub(crate) fn new(keystore: SyncCryptoStorePtr) -> Self {
+		SessionCache {
+			by_relay_parent: HashMap::new(),
+			by_session_index: HashMap::new(),
+			keystore,
+		}
+	}
 	/// Retrieve session info for the given relay parent.
 	///
 	/// This function will query the cache first and will only query the runtime on cache miss.
@@ -119,8 +126,8 @@ impl SessionCache {
 
 			let info = Rc::new(SessionInfo {
 				validator_groups,
-				validators,
 				our_index,
+				session_index,
 			});
 			let downgraded = Rc::downgrade(&info);
 			self.by_relay_parent.insert(parent, downgraded.clone());
diff --git a/node/network/availability-distribution/src/state.rs b/node/network/availability-distribution/src/state.rs
index 2334902af070..3668ae6310c4 100644
--- a/node/network/availability-distribution/src/state.rs
+++ b/node/network/availability-distribution/src/state.rs
@@ -65,7 +65,6 @@ use jaeger::JaegerSpan;
 
 use itertools::{Either, Itertools};
 
-use super::{fetch_task::FetchTask, session_cache::SessionCache, Result, LOG_TARGET};
 use polkadot_node_subsystem_util::request_availability_cores_ctx;
 use polkadot_primitives::v1::{
 	BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, ErasureChunk, Hash, HashT,
@@ -79,6 +78,8 @@ use polkadot_subsystem::{
 	SubsystemContext, SubsystemError,
 };
 
+use super::{fetch_task::FetchTask, session_cache::SessionCache, Result, LOG_TARGET, error::recv_runtime};
+
 /// A running instance of this subsystem.
 pub struct ProtocolState {
 	/// Candidates we need to fetch our chunk for.
@@ -91,10 +92,16 @@ pub struct ProtocolState {
 }
 
 impl ProtocolState {
+	pub(crate) fn new(keystore: SyncCryptoStorePtr) -> Self {
+		ProtocolState {
+			fetches: HashMap::new(),
+			session_cache: SessionCache::new(keystore),
+		}
+	}
 	/// Update heads that need availability distribution.
 	///
 	/// For all active heads we will be fetching our chunk for availabilty distribution.
-	pub(crate) fn update_fetching_heads<Context>(
+	pub(crate) async fn update_fetching_heads<Context>(
 		&mut self,
 		ctx: &mut Context,
 		update: ActiveLeavesUpdate,
@@ -108,8 +115,9 @@ impl ProtocolState {
 		} = update;
 		// Order important! We need to handle activated, prior to deactivated, otherwise we might
 		// cancel still needed jobs.
-		self.start_requesting_chunks(ctx, activated).await?;
-		self.stop_requesting_chunks(ctx, deactivated)?;
+		self.start_requesting_chunks(ctx, activated.into_iter()).await?;
+		self.stop_requesting_chunks(deactivated.into_iter());
+		Ok(())
 	}
 
 	/// Start requesting chunks for newly imported heads.
@@ -119,7 +127,7 @@ impl ProtocolState {
 		new_heads: impl Iterator<Item = (Hash, Arc<JaegerSpan>)>,
 	) -> Result<()>
 	where
-		Context: SubsystemContext<Message = AvailabilityDistributionMessage> + Sync + Send,
+		Context: SubsystemContext,
 	{
 		for (leaf, _) in new_heads {
 			let cores = query_occupied_cores(ctx, leaf).await?;
@@ -134,22 +142,13 @@ impl ProtocolState {
 	/// referenced by any candidate anymore).
 	fn stop_requesting_chunks(
 		&mut self,
-		obsolete_leaves: impl Iterator<Item = (Hash, Arc<JaegerSpan>)>,
-	) -> Result<HashSet<Hash>> {
+		obsolete_leaves: impl Iterator<Item = Hash>,
+	) {
 		let obsolete_leaves: HashSet<_> = obsolete_leaves.into_iter().map(|h| h.0).collect();
-		let new_fetches = self
-			.fetches
-			.into_iter()
-			.filter_map(|(c_hash, task)| {
-				task.remove_leaves(HashSet::from(obsolete_leaves));
-				if task.is_finished() {
-					Some(task.get_relay_parent())
-				} else {
-					None
-				}
-			})
-			.collect();
-		self.fetches = new_fetches;
+		self.fetches.retain(|&c_hash, task| {
+			task.remove_leaves(obsolete_leaves);
+			task.is_live()
+		})
 	}
 
 	/// Add candidates corresponding for a particular relay parent.
@@ -159,25 +158,24 @@ impl ProtocolState {
 	/// Note: The passed in `leaf` is not the same as CandidateDescriptor::relay_parent in the
 	/// given cores. The latter is the relay_parent this candidate considers its parent, while the
 	/// passed in leaf might be some later block where the candidate is still pending availability.
-	fn add_cores<Context>(
+	async fn add_cores<Context>(
 		&mut self,
 		ctx: &mut Context,
 		leaf: Hash,
 		cores: impl IntoIterator<Item = OccupiedCore>,
-	) where
+	) -> Result<()>
+		where
 		Context: SubsystemContext,
 	{
 		for core in cores {
 			match self.fetches.entry(core.candidate_hash) {
 				Entry::Occupied(e) =>
 				// Just book keeping - we are already requesting that chunk:
-				{
-					e.get_mut().add_leaf(leaf)
-				}
+					e.get_mut().add_leaf(leaf),
 				Entry::Vacant(e) => {
 					let session_info = self
 						.session_cache
-						.fetch_session_info(ctx, core.candidate_descriptor.relay_parent)?;
+						.fetch_session_info(ctx, core.candidate_descriptor.relay_parent).await?;
 					if let Some(session_info) = session_info {
 						e.insert(FetchTask::start(ctx, leaf, core, session_info))
 					}
@@ -185,6 +183,7 @@ impl ProtocolState {
 				}
 			}
 		}
+		Ok(())
 	}
 }
 

From a65562f5b067ee9e91e6e01dcc14f259b09f3f23 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Tue, 16 Feb 2021 16:01:21 +0100
Subject: [PATCH 13/60] Directly use AuthorityDiscoveryId in protocol and
 cache.

---
 .../availability-distribution/src/error.rs    |  5 +
 .../src/session_cache.rs                      | 94 +++++++++++++------
 .../protocol/src/request_response/request.rs  |  6 +-
 3 files changed, 76 insertions(+), 29 deletions(-)

diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs
index 0e29294b2c0c..22f363f6584f 100644
--- a/node/network/availability-distribution/src/error.rs
+++ b/node/network/availability-distribution/src/error.rs
@@ -20,6 +20,7 @@ use thiserror::Error;
 use futures::channel::oneshot;
 
 use polkadot_node_subsystem_util::Error as UtilError;
+use polkadot_primitives::v1::SessionIndex;
 use polkadot_subsystem::{
 	errors::{ChainApiError, RuntimeApiError},
 	SubsystemError,
@@ -50,6 +51,10 @@ pub enum Error {
 	/// Some request to the runtime failed.
 	#[error("Runtime request failed")]
 	RuntimeRequest(#[source] RuntimeApiError),
+
+	/// We tried fetching a session which was not available.
+	#[error("No such session")]
+	NoSuchSession(SessionIndex),
 }
 
 pub type Result<T> = std::result::Result<T, Error>;
diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs
index a0957c6c3916..f31984f62479 100644
--- a/node/network/availability-distribution/src/session_cache.rs
+++ b/node/network/availability-distribution/src/session_cache.rs
@@ -24,11 +24,12 @@ use sp_core::crypto::Public;
 use sp_keystore::{CryptoStore, SyncCryptoStorePtr};
 
 use polkadot_node_subsystem_util::{
-	request_session_index_for_child_ctx, request_validator_groups_ctx, request_validators_ctx,
+	request_session_index_for_child_ctx, request_session_info_ctx,
 };
+use polkadot_primitives::v1::SessionInfo as GlobalSessionInfo;
 use polkadot_primitives::v1::{
 	BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, ErasureChunk, Hash, HashT,
-	SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID,
+	SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID, AuthorityDiscoveryId
 };
 use polkadot_subsystem::{
 	jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem,
@@ -73,10 +74,14 @@ pub struct SessionInfo {
 	/// Each group's order is randomized. This way we achieve load balancing when requesting
 	/// chunks, as the validators in a group will be tried in that randomized order. Each node
 	/// should arrive at a different order, therefore we distribute the load.
-	pub validator_groups: Vec<Vec<ValidatorIndex>>,
+	pub validator_groups: Vec<Vec<AuthorityDiscoveryId>>,
 
 	/// Information about ourself:
 	pub our_index: ValidatorIndex,
+	//// Remember to which group we blong, so we won't start fetching chunks for candidates we
+	//// backed our selves.
+	// TODO: Implement this:
+	// pub our_group: GroupIndex,
 }
 
 impl SessionCache {
@@ -114,24 +119,14 @@ impl SessionCache {
 		// session info matches way longer than necessary (for an entire session), but the overhead
 		// should be low enough to not matter.
 		self.bury_dead();
-		if let Some((our_index, validators)) = self.query_validator_info(ctx, parent).await? {
-			let (mut validator_groups, _) =
-				recv_runtime(request_validator_groups_ctx(parent, ctx).await).await?;
 
-			// Shuffle validators in groups:
-			let mut rng = thread_rng();
-			for g in validator_groups.iter_mut() {
-				g.shuffle(&mut rng)
-			}
-
-			let info = Rc::new(SessionInfo {
-				validator_groups,
-				our_index,
-				session_index,
-			});
-			let downgraded = Rc::downgrade(&info);
-			self.by_relay_parent.insert(parent, downgraded.clone());
-			self.by_session_index.insert(session_index, downgraded);
+		if let Some(info) = self
+			.query_info_from_runtime(ctx, parent, session_index)
+			.await?
+		{
+			self.by_relay_parent.insert(parent, Rc::downgrade(&info));
+			self.by_session_index
+				.insert(session_index, Rc::downgrade(&info));
 			return Ok(Some(info));
 		}
 		Ok(None)
@@ -152,26 +147,71 @@ impl SessionCache {
 		upgrade_report_dead(weak_ref)
 	}
 
-	/// Get our validator id and the validators in the current session.
+	/// Query needed information from runtime.
 	///
-	/// Returns: Ok(None) if we are not a validator.
-	async fn query_validator_info<Context>(
+	/// We need to pass in the relay parent for our call to `request_session_info_ctx`. We should
+	/// actually don't need that, I suppose it is used for internal caching based on relay parents,
+	/// which we don't use here. It should not do any harm though.
+	async fn query_info_from_runtime<Context>(
 		&self,
 		ctx: &mut Context,
 		parent: Hash,
-	) -> Result<Option<(ValidatorIndex, Vec<ValidatorId>)>>
+		session_index: SessionIndex,
+	) -> Result<Option<Rc<SessionInfo>>>
 	where
 		Context: SubsystemContext,
 	{
-		let validators = recv_runtime(request_validators_ctx(parent, ctx).await).await?;
+		let GlobalSessionInfo {
+			validators,
+			discovery_keys,
+			mut validator_groups,
+			..
+		} = recv_runtime(request_session_info_ctx(parent, session_index, ctx).await)
+			.await?
+			.ok_or(Error::NoSuchSession(session_index))?;
+
+		if let Some(our_index) = self.get_our_index(validators).await {
+			// Shuffle validators in groups:
+			let mut rng = thread_rng();
+			for g in validator_groups.iter_mut() {
+				g.shuffle(&mut rng)
+			}
+			// Look up `AuthorityDiscoveryId`s right away:
+			let validator_groups: Vec<Vec<_>> = validator_groups
+				.into_iter()
+				.map(|group| {
+					group
+						.into_iter()
+						.map(|index| {
+							discovery_keys.get(Into<u32>::into(index) as usize)
+							.expect("There should be a discovery key for each validator of each validator group. qed.").clone()
+						})
+						.collect()
+				})
+				.collect();
+
+			let info = Rc::new(SessionInfo {
+				validator_groups,
+				our_index,
+				session_index,
+			});
+			return Ok(Some(info));
+		}
+		return Ok(None);
+	}
+
+	/// Get our validator id and the validators in the current session.
+	///
+	/// Returns: Ok(None) if we are not a validator.
+	async fn get_our_index(&self, validators: Vec<ValidatorId>) -> Option<ValidatorIndex> {
 		for (i, v) in validators.iter().enumerate() {
 			if CryptoStore::has_keys(&*self.keystore, &[(v.to_raw_vec(), ValidatorId::ID)])
 				.await
 			{
-				return Ok(Some((ValidatorIndex(i as u32), validators)));
+				return Some(ValidatorIndex(i as u32));
 			}
 		}
-		Ok(None)
+		None
 	}
 
 	/// Get rid of the dead bodies from time to time.
diff --git a/node/network/protocol/src/request_response/request.rs b/node/network/protocol/src/request_response/request.rs
index a37ff8d2eaa1..2f086510177d 100644
--- a/node/network/protocol/src/request_response/request.rs
+++ b/node/network/protocol/src/request_response/request.rs
@@ -22,6 +22,8 @@ use sc_network as network;
 use sc_network::config as netconfig;
 use sc_network::PeerId;
 
+use polkadot_primitives::v1::AuthorityDiscoveryId;
+
 use super::{v1, Protocol};
 
 /// Common properties of any `Request`.
@@ -69,7 +71,7 @@ impl Requests {
 #[derive(Debug)]
 pub struct OutgoingRequest<Req> {
 	/// Intendent recipient of this request.
-	pub peer: PeerId,
+	pub peer: AuthorityDiscoveryId,
 	/// The actual request to send over the wire.
 	pub payload: Req,
 	/// Sender which is used by networking to get us back a response.
@@ -98,7 +100,7 @@ where
 	/// It will contain a sender that is used by the networking for sending back responses. The
 	/// connected receiver is returned as the second element in the returned tuple.
 	pub fn new(
-		peer: PeerId,
+		peer: AuthorityDiscoveryId,
 		payload: Req,
 	) -> (
 		Self,

From 4a4356108fd4a887a51ababe19be4bb0c729b822 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Tue, 16 Feb 2021 20:44:51 +0100
Subject: [PATCH 14/60] Resolve `AuthorityDiscoveryId` on sending requests.

---
 node/network/bridge/src/lib.rs     | 11 +++++--
 node/network/bridge/src/network.rs | 50 +++++++++++++++++++++++++++---
 2 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/node/network/bridge/src/lib.rs b/node/network/bridge/src/lib.rs
index 030a3c5b8d11..67565e685fba 100644
--- a/node/network/bridge/src/lib.rs
+++ b/node/network/bridge/src/lib.rs
@@ -118,7 +118,7 @@ impl<N, AD> NetworkBridge<N, AD> {
 
 impl<Net, AD, Context> Subsystem<Context> for NetworkBridge<Net, AD>
 	where
-		Net: Network + validator_discovery::Network,
+		Net: Network + validator_discovery::Network + Sync,
 		AD: validator_discovery::AuthorityDiscovery,
 		Context: SubsystemContext<Message=NetworkBridgeMessage>,
 {
@@ -238,7 +238,10 @@ where
 
 			Action::SendRequests(reqs) => {
 				for req in reqs {
-					bridge.network_service.start_request(req);
+					bridge
+						.network_service
+						.start_request(&mut bridge.authority_discovery_service, req)
+						.await;
 				}
 			},
 
@@ -615,6 +618,7 @@ mod tests {
 	use polkadot_node_network_protocol::{ObservedRole, request_response::request::Requests};
 
 	use crate::network::{Network, NetworkAction};
+	use crate::validator_discovery::AuthorityDiscovery;
 
 	// The subsystem's view of the network - only supports a single call to `event_stream`.
 	struct TestNetwork {
@@ -654,6 +658,7 @@ mod tests {
 		)
 	}
 
+	#[async_trait]
 	impl Network for TestNetwork {
 		fn event_stream(&mut self) -> BoxStream<'static, NetworkEvent> {
 			self.net_events.lock()
@@ -668,7 +673,7 @@ mod tests {
 			Box::pin((&mut self.action_tx).sink_map_err(Into::into))
 		}
 
-		fn start_request(&self, _: Requests) {
+		async fn start_request<AD: AuthorityDiscovery>(&self, _: &mut AD, _: Requests) {
 		}
 	}
 
diff --git a/node/network/bridge/src/network.rs b/node/network/bridge/src/network.rs
index 77c58e120616..ad8f85d81d55 100644
--- a/node/network/bridge/src/network.rs
+++ b/node/network/bridge/src/network.rs
@@ -17,6 +17,7 @@
 use std::pin::Pin;
 use std::sync::Arc;
 
+use async_trait::async_trait;
 use futures::future::BoxFuture;
 use futures::prelude::*;
 use futures::stream::BoxStream;
@@ -24,7 +25,7 @@ use futures::stream::BoxStream;
 use parity_scale_codec::Encode;
 
 use sc_network::Event as NetworkEvent;
-use sc_network::{NetworkService, IfDisconnected};
+use sc_network::{IfDisconnected, NetworkService, OutboundFailure, RequestFailure};
 
 use polkadot_node_network_protocol::{
 	peer_set::PeerSet,
@@ -34,6 +35,8 @@ use polkadot_node_network_protocol::{
 use polkadot_primitives::v1::{Block, Hash};
 use polkadot_subsystem::{SubsystemError, SubsystemResult};
 
+use crate::validator_discovery::{peer_id_from_multiaddr, AuthorityDiscovery};
+
 use super::LOG_TARGET;
 
 /// Send a message to the network.
@@ -92,6 +95,7 @@ pub enum NetworkAction {
 }
 
 /// An abstraction over networking for the purposes of this subsystem.
+#[async_trait]
 pub trait Network: Send + 'static {
 	/// Get a stream of all events occurring on the network. This may include events unrelated
 	/// to the Polkadot protocol - the user of this function should filter only for events related
@@ -105,7 +109,11 @@ pub trait Network: Send + 'static {
 	) -> Pin<Box<dyn Sink<NetworkAction, Error = SubsystemError> + Send + 'a>>;
 
 	/// Send a request to a remote peer.
-	fn start_request(&self, req: Requests);
+	async fn start_request<AD: AuthorityDiscovery>(
+		&self,
+		authority_discovery: &mut AD,
+		req: Requests,
+	);
 
 	/// Report a given peer as either beneficial (+) or costly (-) according to the given scalar.
 	fn report_peer(
@@ -137,6 +145,7 @@ pub trait Network: Send + 'static {
 	}
 }
 
+#[async_trait]
 impl Network for Arc<NetworkService<Block, Hash>> {
 	fn event_stream(&mut self) -> BoxStream<'static, NetworkEvent> {
 		NetworkService::event_stream(self, "polkadot-network-bridge").boxed()
@@ -189,7 +198,11 @@ impl Network for Arc<NetworkService<Block, Hash>> {
 		Box::pin(ActionSink(&**self))
 	}
 
-	fn start_request(&self, req: Requests) {
+	async fn start_request<AD: AuthorityDiscovery>(
+		&self,
+		authority_discovery: &mut AD,
+		req: Requests,
+	) {
 		let (
 			protocol,
 			OutgoingRequest {
@@ -199,8 +212,35 @@ impl Network for Arc<NetworkService<Block, Hash>> {
 			},
 		) = req.encode_request();
 
-		NetworkService::start_request(&*self,
-			peer,
+		let peer_id = authority_discovery
+			.get_addresses_by_authority_id(peer)
+			.await
+			.and_then(|addrs| {
+				addrs
+					.into_iter()
+					.find_map(|addr| peer_id_from_multiaddr(&addr))
+			});
+
+		let peer_id = match peer_id {
+			None => {
+				tracing::debug!(target: LOG_TARGET, "Discovering authority failed");
+				match pending_response
+					.send(Err(RequestFailure::Network(OutboundFailure::DialFailure)))
+				{
+					Err(_) => tracing::debug!(
+						target: LOG_TARGET,
+						"Sending failed request response failed."
+					),
+					Ok(_) => {}
+				}
+				return;
+			}
+			Some(peer_id) => peer_id,
+		};
+
+		NetworkService::start_request(
+			&*self,
+			peer_id,
 			protocol.into_protocol_name(),
 			payload,
 			pending_response,

From 6543b303ab92748a18aa17b56e298d3fa6ebb1fc Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Wed, 17 Feb 2021 15:48:27 +0100
Subject: [PATCH 15/60] Rework fetch_task

- also make it impossible to check the wrong chunk index.
- Export needed function in validator_discovery.
---
 .../src/fetch_task.rs                         | 90 +++++++++----------
 .../network/bridge/src/validator_discovery.rs |  2 +-
 .../protocol/src/request_response/v1.rs       | 41 +++++++--
 3 files changed, 77 insertions(+), 56 deletions(-)

diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/fetch_task.rs
index f482593fb9ea..da8c708ea421 100644
--- a/node/network/availability-distribution/src/fetch_task.rs
+++ b/node/network/availability-distribution/src/fetch_task.rs
@@ -23,16 +23,15 @@ use futures::channel::oneshot;
 use futures::future::select;
 use futures::SinkExt;
 
-use sc_network::PeerId;
-
 use polkadot_erasure_coding::branch_hash;
 use polkadot_node_network_protocol::request_response::{
 	request::{OutgoingRequest, RequestError, Requests},
 	v1::{AvailabilityFetchingRequest, AvailabilityFetchingResponse},
 };
 use polkadot_primitives::v1::{
-	BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, ErasureChunk, GroupIndex, Hash,
-	HashT, OccupiedCore, SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID,
+	AuthorityDiscoveryId, BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState,
+	ErasureChunk, GroupIndex, Hash, HashT, OccupiedCore, SessionIndex, ValidatorId,
+	ValidatorIndex, PARACHAIN_KEY_TYPE_ID,
 };
 use polkadot_subsystem::messages::{
 	AllMessages, AvailabilityDistributionMessage, AvailabilityStoreMessage, ChainApiMessage,
@@ -41,7 +40,7 @@ use polkadot_subsystem::messages::{
 use polkadot_subsystem::{
 	errors::{ChainApiError, RuntimeApiError},
 	jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem,
-	Subsystem, SubsystemContext, SubsystemError, SubsystemResult
+	Subsystem, SubsystemContext, SubsystemError, SubsystemResult,
 };
 
 use super::{session_cache::SessionInfo, LOG_TARGET};
@@ -91,7 +90,7 @@ pub struct BadValidators {
 	/// The group the not properly responding validators are.
 	pub group_index: GroupIndex,
 	/// The indeces of the bad validators.
-	pub bad_validators: Vec<ValidatorIndex>,
+	pub bad_validators: Vec<AuthorityDiscoveryId>,
 }
 
 /// Information a running task needs.
@@ -103,7 +102,9 @@ struct RunningTask {
 	group_index: GroupIndex,
 
 	/// Validators to request the chunk from.
-	group: Vec<ValidatorIndex>,
+	///
+	/// This vector gets drained during execution of the task (it will be empty afterwards).
+	group: Vec<AuthorityDiscoveryId>,
 
 	/// The request to send.
 	request: AvailabilityFetchingRequest,
@@ -114,9 +115,6 @@ struct RunningTask {
 	/// Relay parent of the candidate to fetch.
 	relay_parent: Hash,
 
-	/// Hash of the candidate we are fetching our chunk for.
-	candidate_hash: CandidateHash,
-
 	/// Sender for communicating with other subsystems and reporting results.
 	sender: mpsc::Sender<FromFetchTask>,
 }
@@ -144,10 +142,9 @@ impl FetchTask {
 			},
 			erasure_root: core.candidate_descriptor.erasure_root,
 			relay_parent: core.candidate_descriptor.relay_parent,
-			candidate_hash: core.candidate_hash,
 			sender,
 		};
-		ctx.spawn("chunk-fetcher", Pin::new(Box::new(running.run(kill))))
+		ctx.spawn("chunk-fetcher", running.run(kill).boxed())
 			.await?;
 		Ok(FetchTask {
 			live_in: vec![leaf].into_iter().collect(),
@@ -202,30 +199,20 @@ type Result<T> = std::result::Result<T, TaskError>;
 impl RunningTask {
 	async fn run(self, kill: oneshot::Receiver<()>) {
 		// Wait for completion/or cancel.
-		let _ = select(self.run_inner(), kill);
+		let run_it = self.run_inner();
+		futures::pin_mut!(run_it);
+		let _ = select(run_it, kill).await;
 	}
 
 	/// Fetch and store chunk.
 	///
 	/// Try validators in backing group in order.
-	async fn run_inner(self) {
-		let bad_validators = Vec::new();
+	async fn run_inner(mut self) {
+		let mut bad_validators = Vec::new();
 		// Try validators in order:
-		for index in self.group {
+		while let Some(validator)= self.group.pop() {
 			// Send request:
-			let peer_id = match self.get_peer_id(index).await {
-				Ok(peer_id) => peer_id,
-				Err(err) => {
-					tracing::warn!(
-						target: LOG_TARGET,
-						validator_index = ?index,
-						"Discoverying peer id for validator failed"
-					);
-					bad_validators.push(index);
-					continue
-				}
-			};
-			let resp = match self.do_request(peer_id).await {
+			let resp = match self.do_request(&validator).await {
 				Ok(resp) => resp,
 				Err(TaskError::ShuttingDown) => {
 					tracing::info!(
@@ -235,17 +222,19 @@ impl RunningTask {
 					return;
 				}
 				Err(TaskError::PeerError) => {
-					bad_validators.push(index);
+					bad_validators.push(validator);
 					continue;
 				}
 			};
 			let chunk = match resp {
-				AvailabilityFetchingResponse::Chunk(chunk) => chunk,
+				AvailabilityFetchingResponse::Chunk(resp) => {
+					resp.reconstruct_erasure_chunk(&self.request)
+				}
 			};
 
 			// Data genuine?
-			if !self.validate_chunk(peer_id, &chunk) {
-				bad_validators.push(index);
+			if !self.validate_chunk(&validator, &chunk) {
+				bad_validators.push(validator);
 				continue;
 			}
 
@@ -258,16 +247,17 @@ impl RunningTask {
 
 	/// Do request and return response, if successful.
 	async fn do_request(
-		&self,
-		peer: PeerId,
+		&mut self,
+		validator: &AuthorityDiscoveryId,
 	) -> std::result::Result<AvailabilityFetchingResponse, TaskError> {
-		let (full_request, response_recv) = OutgoingRequest::new(peer, self.request);
+		let (full_request, response_recv) =
+			OutgoingRequest::new(validator.clone(), self.request);
 		let requests = Requests::AvailabilityFetching(full_request);
 
 		self.sender
-			.send(FromFetchTask::Message(
-				AllMessages::NetworkBridge(NetworkBridgeMessage::SendRequests(vec![requests])),
-			))
+			.send(FromFetchTask::Message(AllMessages::NetworkBridge(
+				NetworkBridgeMessage::SendRequests(vec![requests]),
+			)))
 			.await
 			.map_err(|_| TaskError::ShuttingDown)?;
 
@@ -276,6 +266,7 @@ impl RunningTask {
 			Err(RequestError::InvalidResponse(err)) => {
 				tracing::warn!(
 					target: LOG_TARGET,
+					origin= ?validator,
 					"Peer sent us invalid erasure chunk data"
 				);
 				Err(TaskError::PeerError)
@@ -283,26 +274,29 @@ impl RunningTask {
 			Err(RequestError::NetworkError(err)) => {
 				tracing::warn!(
 					target: LOG_TARGET,
+					origin= ?validator,
 					"Some network error occurred when fetching erasure chunk"
 				);
 				Err(TaskError::PeerError)
 			}
 			Err(RequestError::Canceled(err)) => {
-				tracing::warn!(target: LOG_TARGET, "Erasure chunk request got canceled");
+				tracing::warn!(target: LOG_TARGET,
+							   origin= ?validator,
+							   "Erasure chunk request got canceled");
 				Err(TaskError::PeerError)
 			}
 		}
 	}
 
-	fn validate_chunk(&self, peer_id: &PeerId, chunk: &ErasureChunk) -> bool {
+	fn validate_chunk(&self, validator: &AuthorityDiscoveryId, chunk: &ErasureChunk) -> bool {
 		let anticipated_hash =
 			match branch_hash(&self.erasure_root, &chunk.proof, chunk.index as usize) {
 				Ok(hash) => hash,
 				Err(e) => {
 					tracing::trace!(
 					target: LOG_TARGET,
-					candidate_hash = ?self.candidate_hash,
-					origin = ?peer_id,
+					candidate_hash = ?self.request.candidate_hash,
+					origin = ?validator,
 					error = ?e,
 					"Failed to calculate chunk merkle proof",
 					);
@@ -311,23 +305,19 @@ impl RunningTask {
 			};
 		let erasure_chunk_hash = BlakeTwo256::hash(&chunk.chunk);
 		if anticipated_hash != erasure_chunk_hash {
-			tracing::warn!(target: LOG_TARGET, origin = ?peer_id,  "Received chunk does not match merkle tree");
+			tracing::warn!(target: LOG_TARGET, origin = ?validator,  "Received chunk does not match merkle tree");
 			return false;
 		}
 		true
 	}
 
-	fn get_peer_id(&self, index: ValidatorIndex) -> Result<PeerId> {
-		panic!("TO BE IMPLEMENTED");
-	}
-
 	/// Store given chunk and log any error.
 	async fn store_chunk(&mut self, chunk: ErasureChunk) {
 		let (tx, rx) = oneshot::channel();
 		self.sender
 			.send(FromFetchTask::Message(AllMessages::AvailabilityStore(
 				AvailabilityStoreMessage::StoreChunk {
-					candidate_hash: self.candidate_hash,
+					candidate_hash: self.request.candidate_hash,
 					relay_parent: self.relay_parent,
 					chunk,
 					tx,
@@ -341,7 +331,7 @@ impl RunningTask {
 	}
 
 	/// Tell subsystem we are done.
-	async fn conclude(&mut self, bad_validators: Vec<ValidatorIndex>) {
+	async fn conclude(&mut self, bad_validators: Vec<AuthorityDiscoveryId>) {
 		let payload = if bad_validators.is_empty() {
 			None
 		} else {
diff --git a/node/network/bridge/src/validator_discovery.rs b/node/network/bridge/src/validator_discovery.rs
index 926aa3706649..06fb5b65bdb8 100644
--- a/node/network/bridge/src/validator_discovery.rs
+++ b/node/network/bridge/src/validator_discovery.rs
@@ -126,7 +126,7 @@ fn on_revoke(map: &mut HashMap<AuthorityDiscoveryId, u64>, id: AuthorityDiscover
 	None
 }
 
-fn peer_id_from_multiaddr(addr: &Multiaddr) -> Option<PeerId> {
+pub(crate) fn peer_id_from_multiaddr(addr: &Multiaddr) -> Option<PeerId> {
 	addr.iter().last().and_then(|protocol| if let Protocol::P2p(multihash) = protocol {
 		PeerId::from_multihash(multihash).ok()
 	} else {
diff --git a/node/network/protocol/src/request_response/v1.rs b/node/network/protocol/src/request_response/v1.rs
index 04a7865bfc44..53e58f6b48b1 100644
--- a/node/network/protocol/src/request_response/v1.rs
+++ b/node/network/protocol/src/request_response/v1.rs
@@ -24,18 +24,49 @@ use super::request::IsRequest;
 use super::Protocol;
 
 /// Request an availability chunk.
-#[derive(Debug, Clone, Encode, Decode)]
+#[derive(Debug, Copy, Clone, Encode, Decode)]
 pub struct AvailabilityFetchingRequest {
-	candidate_hash: CandidateHash,
-	index: ValidatorIndex,
+	pub candidate_hash: CandidateHash,
+	pub index: ValidatorIndex,
 }
 
 /// Receive a rqeuested erasure chunk.
 #[derive(Debug, Clone, Encode, Decode)]
 pub enum AvailabilityFetchingResponse {
-	/// The requested chunk.
+	/// The requested chunk data.
 	#[codec(index = 0)]
-	Chunk(ErasureChunk),
+	Chunk(ChunkResponse),
+}
+
+/// Skimmed down variant of `ErasureChunk`.
+///
+/// Instead of transmitting a full `ErasureChunk` we transmit `ChunkResponse` in
+/// `AvailabilityFetchingResponse`, which omits the chunk's index. The index is already known by
+/// the requester and by not transmitting it, we ensure the requester is going to use his index
+/// value for validating the response, thus making sure he got what he requested.
+#[derive(Debug, Clone, Encode, Decode)]
+pub struct ChunkResponse {
+	/// The erasure-encoded chunk of data belonging to the candidate block.
+	pub chunk: Vec<u8>,
+	/// Proof for this chunk's branch in the Merkle tree.
+	pub proof: Vec<Vec<u8>>,
+}
+
+impl From<ErasureChunk> for ChunkResponse {
+	fn from(ErasureChunk {chunk, index: _, proof}: ErasureChunk) -> Self {
+		ChunkResponse { chunk, proof}
+	}
+}
+
+impl ChunkResponse {
+	/// Re-build an `ErasureChunk` from response and request.
+	pub fn reconstruct_erasure_chunk(self, req: &AvailabilityFetchingRequest) -> ErasureChunk {
+		ErasureChunk {
+			chunk: self.chunk,
+			proof: self.proof,
+			index: req.index.0,
+		}
+	}
 }
 
 impl IsRequest for AvailabilityFetchingRequest {

From 256e559fbea7e5b42745dc4139cbdd3dbbffd917 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Wed, 17 Feb 2021 15:49:16 +0100
Subject: [PATCH 16/60] From<u32> implementation for `ValidatorIndex`.

---
 primitives/src/v0.rs | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/primitives/src/v0.rs b/primitives/src/v0.rs
index 8c6b4f538a54..ecb9f9cf3e53 100644
--- a/primitives/src/v0.rs
+++ b/primitives/src/v0.rs
@@ -119,6 +119,13 @@ impl MallocSizeOf for ValidatorId {
 #[derive(Debug, MallocSizeOf)]
 pub struct ValidatorIndex(pub u32);
 
+// We should really get https://github.com/paritytech/polkadot/issues/2403 going ..
+impl From<u32> for ValidatorIndex {
+	fn from(n: u32) -> Self {
+		ValidatorIndex(n)
+	}
+}
+
 application_crypto::with_pair! {
 	/// A Parachain validator keypair.
 	pub type ValidatorPair = validator_app::Pair;

From f8d5fef85313da4041b7b4f571d3fbad91f0c622 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Wed, 17 Feb 2021 22:02:12 +0100
Subject: [PATCH 17/60] Fixes and more integration work.

---
 .../availability-distribution/src/error.rs    |  4 ++
 .../src/fetch_task.rs                         | 25 +++-----
 .../availability-distribution/src/lib.rs      | 11 ++--
 .../src/session_cache.rs                      | 17 ++++-
 .../availability-distribution/src/state.rs    | 63 ++++++++++++++-----
 5 files changed, 81 insertions(+), 39 deletions(-)

diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs
index 22f363f6584f..c507d5835604 100644
--- a/node/network/availability-distribution/src/error.rs
+++ b/node/network/availability-distribution/src/error.rs
@@ -55,6 +55,10 @@ pub enum Error {
 	/// We tried fetching a session which was not available.
 	#[error("No such session")]
 	NoSuchSession(SessionIndex),
+
+	/// Spawning a running task failed.
+	#[error("Spawning subsystem task failed")]
+	SpawnTask(#[source] SubsystemError),
 }
 
 pub type Result<T> = std::result::Result<T, Error>;
diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/fetch_task.rs
index da8c708ea421..a73edc50a00f 100644
--- a/node/network/availability-distribution/src/fetch_task.rs
+++ b/node/network/availability-distribution/src/fetch_task.rs
@@ -21,7 +21,7 @@ use std::rc::Rc;
 use futures::channel::mpsc;
 use futures::channel::oneshot;
 use futures::future::select;
-use futures::SinkExt;
+use futures::{SinkExt, FutureExt};
 
 use polkadot_erasure_coding::branch_hash;
 use polkadot_node_network_protocol::request_response::{
@@ -43,7 +43,7 @@ use polkadot_subsystem::{
 	Subsystem, SubsystemContext, SubsystemError, SubsystemResult,
 };
 
-use super::{session_cache::SessionInfo, LOG_TARGET};
+use super::{session_cache::{SessionInfo, BadValidators}, LOG_TARGET, error::{Error, Result}};
 
 pub struct FetchTask {
 	/// For what relay parents this task is relevant.
@@ -83,16 +83,6 @@ pub enum FromFetchTask {
 	Concluded(Option<BadValidators>),
 }
 
-/// Report of bad validators.
-pub struct BadValidators {
-	/// The session index that was used.
-	pub session_index: SessionIndex,
-	/// The group the not properly responding validators are.
-	pub group_index: GroupIndex,
-	/// The indeces of the bad validators.
-	pub bad_validators: Vec<AuthorityDiscoveryId>,
-}
-
 /// Information a running task needs.
 struct RunningTask {
 	/// For what session we have been spawned.
@@ -127,7 +117,7 @@ impl FetchTask {
 		core: OccupiedCore,
 		session_info: Rc<SessionInfo>,
 		sender: mpsc::Sender<FromFetchTask>,
-	) -> SubsystemResult<Self>
+	) -> Result<Self>
 	where
 		Context: SubsystemContext,
 	{
@@ -135,7 +125,7 @@ impl FetchTask {
 		let running =  RunningTask {
 			session_index: session_info.session_index,
 			group_index: core.group_responsible,
-			group: session_info.validator_groups.get(core.group_responsible.into() as usize).expect("The responsible group of a candidate should be available in the corresponding session. qed.").clone(),
+			group: session_info.validator_groups.get(core.group_responsible.0 as usize).expect("The responsible group of a candidate should be available in the corresponding session. qed.").clone(),
 			request: AvailabilityFetchingRequest {
 				candidate_hash: core.candidate_hash,
 				index: session_info.our_index,
@@ -145,7 +135,8 @@ impl FetchTask {
 			sender,
 		};
 		ctx.spawn("chunk-fetcher", running.run(kill).boxed())
-			.await?;
+			.await
+			.map_err(|e| Error::SpawnTask(e))?;
 		Ok(FetchTask {
 			live_in: vec![leaf].into_iter().collect(),
 			state: FetchedState::Started(handle),
@@ -194,8 +185,6 @@ enum TaskError {
 	ShuttingDown,
 }
 
-type Result<T> = std::result::Result<T, TaskError>;
-
 impl RunningTask {
 	async fn run(self, kill: oneshot::Receiver<()>) {
 		// Wait for completion/or cancel.
@@ -210,7 +199,7 @@ impl RunningTask {
 	async fn run_inner(mut self) {
 		let mut bad_validators = Vec::new();
 		// Try validators in order:
-		while let Some(validator)= self.group.pop() {
+		while let Some(validator) = self.group.pop() {
 			// Send request:
 			let resp = match self.do_request(&validator).await {
 				Ok(resp) => resp,
diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs
index c0792db5d722..a02bcc2a38f3 100644
--- a/node/network/availability-distribution/src/lib.rs
+++ b/node/network/availability-distribution/src/lib.rs
@@ -15,6 +15,8 @@
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
 
 
+use futures::{FutureExt, TryFutureExt};
+
 use sp_keystore::SyncCryptoStorePtr;
 
 use polkadot_subsystem::{
@@ -60,7 +62,7 @@ where
 {
 	fn start(self, ctx: Context) -> SpawnedSubsystem {
 		let future = self
-			.run(ctx, ProtocolState::new())
+			.run(ctx)
 			.map_err(|e| SubsystemError::with_origin("availability-distribution", e))
 			.boxed();
 
@@ -79,16 +81,17 @@ impl AvailabilityDistributionSubsystem {
 	}
 
 	/// Start processing work as passed on from the Overseer.
-	async fn run<Context>(self, mut ctx: Context, state: &mut ProtocolState) -> Result<()>
+	async fn run<Context>(self, mut ctx: Context) -> Result<()>
 	where
 		Context: SubsystemContext<Message = AvailabilityDistributionMessage> + Sync + Send,
 	{
+		let mut state = ProtocolState::new(self.keystore.clone());
 		loop {
 			let message = ctx.recv().await?;
 			match message {
 				FromOverseer::Signal(OverseerSignal::ActiveLeaves(update)) => {
 					// Update the relay chain heads we are fetching our pieces for:
-					state.update_fetching_heads(&mut ctx, update)?;
+					state.update_fetching_heads(&mut ctx, update).await?;
 				}
 				FromOverseer::Signal(OverseerSignal::BlockFinalized(..)) => {}
 				FromOverseer::Signal(OverseerSignal::Conclude) => {
@@ -96,7 +99,7 @@ impl AvailabilityDistributionSubsystem {
 				}
 				FromOverseer::Communication {
 					msg: AvailabilityDistributionMessage::AvailabilityFetchingRequest(_),
-				} => { 
+				} => {
 					// TODO: Implement issue 2306:
 					tracing::warn!(
 						target: LOG_TARGET,
diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs
index f31984f62479..b09d0480afee 100644
--- a/node/network/availability-distribution/src/session_cache.rs
+++ b/node/network/availability-distribution/src/session_cache.rs
@@ -28,8 +28,9 @@ use polkadot_node_subsystem_util::{
 };
 use polkadot_primitives::v1::SessionInfo as GlobalSessionInfo;
 use polkadot_primitives::v1::{
-	BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, ErasureChunk, Hash, HashT,
-	SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID, AuthorityDiscoveryId
+	AuthorityDiscoveryId, BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState,
+	ErasureChunk, Hash, HashT, SessionIndex, ValidatorId, ValidatorIndex,
+	PARACHAIN_KEY_TYPE_ID, GroupIndex,
 };
 use polkadot_subsystem::{
 	jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem,
@@ -84,6 +85,16 @@ pub struct SessionInfo {
 	// pub our_group: GroupIndex,
 }
 
+/// Report of bad validators.
+pub struct BadValidators {
+	/// The session index that was used.
+	pub session_index: SessionIndex,
+	/// The group the not properly responding validators are.
+	pub group_index: GroupIndex,
+	/// The indeces of the bad validators.
+	pub bad_validators: Vec<AuthorityDiscoveryId>,
+}
+
 impl SessionCache {
 	pub(crate) fn new(keystore: SyncCryptoStorePtr) -> Self {
 		SessionCache {
@@ -183,7 +194,7 @@ impl SessionCache {
 					group
 						.into_iter()
 						.map(|index| {
-							discovery_keys.get(Into<u32>::into(index) as usize)
+							discovery_keys.get(index.0 as usize)
 							.expect("There should be a discovery key for each validator of each validator group. qed.").clone()
 						})
 						.collect()
diff --git a/node/network/availability-distribution/src/state.rs b/node/network/availability-distribution/src/state.rs
index 3668ae6310c4..76d90e3581a3 100644
--- a/node/network/availability-distribution/src/state.rs
+++ b/node/network/availability-distribution/src/state.rs
@@ -60,10 +60,12 @@ use std::collections::{
 use std::iter::IntoIterator;
 use std::sync::Arc;
 
-use futures::channel::oneshot;
+use futures::channel::{mpsc, oneshot};
+use futures::StreamExt;
+use itertools::{Either, Itertools};
 use jaeger::JaegerSpan;
 
-use itertools::{Either, Itertools};
+use sp_keystore::SyncCryptoStorePtr;
 
 use polkadot_node_subsystem_util::request_availability_cores_ctx;
 use polkadot_primitives::v1::{
@@ -78,7 +80,12 @@ use polkadot_subsystem::{
 	SubsystemContext, SubsystemError,
 };
 
-use super::{fetch_task::FetchTask, session_cache::SessionCache, Result, LOG_TARGET, error::recv_runtime};
+use super::{
+	error::recv_runtime,
+	fetch_task::{FetchTask, FromFetchTask},
+	session_cache::SessionCache,
+	Result, LOG_TARGET,
+};
 
 /// A running instance of this subsystem.
 pub struct ProtocolState {
@@ -89,13 +96,23 @@ pub struct ProtocolState {
 	///
 	/// This is usually the current one and at session boundaries also the last one.
 	session_cache: SessionCache,
+
+	/// Sender to be cloned for `FetchTask`s.
+	tx: mpsc::Sender<FromFetchTask>,
+
+	/// Receive messages from `FetchTask`.
+	rx: mpsc::Receiver<FromFetchTask>,
 }
 
 impl ProtocolState {
 	pub(crate) fn new(keystore: SyncCryptoStorePtr) -> Self {
+		// All we do is forwarding messages, no need to make this big.
+		let (tx, rx) = mpsc::channel(1);
 		ProtocolState {
 			fetches: HashMap::new(),
 			session_cache: SessionCache::new(keystore),
+			tx,
+			rx,
 		}
 	}
 	/// Update heads that need availability distribution.
@@ -115,11 +132,26 @@ impl ProtocolState {
 		} = update;
 		// Order important! We need to handle activated, prior to deactivated, otherwise we might
 		// cancel still needed jobs.
-		self.start_requesting_chunks(ctx, activated.into_iter()).await?;
+		self.start_requesting_chunks(ctx, activated.into_iter())
+			.await?;
 		self.stop_requesting_chunks(deactivated.into_iter());
 		Ok(())
 	}
 
+	pub(crate) async fn advance<Context>(&mut self, ctx: &mut Context) -> Result<()>
+	where
+		Context: SubsystemContext,
+	{
+		match self.rx.next().await {
+			Some(FromFetchTask::Message(m)) => ctx.send_message(m).await,
+			Some(FromFetchTask::Concluded(Some(bad_boys))) => {
+				self.session_cache.report_bad(bad_boys)?
+			}
+			Some(FromFetchTask::Concluded(None)) => {}
+		}
+		Ok(())
+	}
+
 	/// Start requesting chunks for newly imported heads.
 	async fn start_requesting_chunks<Context>(
 		&mut self,
@@ -140,11 +172,8 @@ impl ProtocolState {
 	///
 	/// Returns relay_parents which became irrelevant for availability fetching (are not
 	/// referenced by any candidate anymore).
-	fn stop_requesting_chunks(
-		&mut self,
-		obsolete_leaves: impl Iterator<Item = Hash>,
-	) {
-		let obsolete_leaves: HashSet<_> = obsolete_leaves.into_iter().map(|h| h.0).collect();
+	fn stop_requesting_chunks(&mut self, obsolete_leaves: impl Iterator<Item = Hash>) {
+		let obsolete_leaves: HashSet<_> = obsolete_leaves.into_iter().collect();
 		self.fetches.retain(|&c_hash, task| {
 			task.remove_leaves(obsolete_leaves);
 			task.is_live()
@@ -164,20 +193,26 @@ impl ProtocolState {
 		leaf: Hash,
 		cores: impl IntoIterator<Item = OccupiedCore>,
 	) -> Result<()>
-		where
+	where
 		Context: SubsystemContext,
 	{
 		for core in cores {
 			match self.fetches.entry(core.candidate_hash) {
-				Entry::Occupied(e) =>
+				Entry::Occupied(mut e) =>
 				// Just book keeping - we are already requesting that chunk:
-					e.get_mut().add_leaf(leaf),
+				{
+					e.get_mut().add_leaf(leaf)
+				}
 				Entry::Vacant(e) => {
 					let session_info = self
 						.session_cache
-						.fetch_session_info(ctx, core.candidate_descriptor.relay_parent).await?;
+						.fetch_session_info(ctx, core.candidate_descriptor.relay_parent)
+						.await?;
 					if let Some(session_info) = session_info {
-						e.insert(FetchTask::start(ctx, leaf, core, session_info))
+						e.insert(
+							FetchTask::start(ctx, leaf, core, session_info, self.tx.clone())
+								.await?,
+						);
 					}
 					// Not a validator, nothing to do.
 				}

From 5e77fb4ce0456b74a5d0657d742c140a42ae8dd3 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Thu, 18 Feb 2021 10:17:42 +0100
Subject: [PATCH 18/60] Make session cache proper lru cache.

---
 .../src/session_cache.rs                      | 98 ++++++++++++-------
 1 file changed, 63 insertions(+), 35 deletions(-)

diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs
index b09d0480afee..2ebc353f4fae 100644
--- a/node/network/availability-distribution/src/session_cache.rs
+++ b/node/network/availability-distribution/src/session_cache.rs
@@ -14,9 +14,10 @@
 // You should have received a copy of the GNU General Public License
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
 
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
 use std::rc::{Rc, Weak};
 
+use lru::LruCache;
 use rand::{seq::SliceRandom, thread_rng};
 
 use sp_application_crypto::AppKey;
@@ -29,8 +30,8 @@ use polkadot_node_subsystem_util::{
 use polkadot_primitives::v1::SessionInfo as GlobalSessionInfo;
 use polkadot_primitives::v1::{
 	AuthorityDiscoveryId, BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState,
-	ErasureChunk, Hash, HashT, SessionIndex, ValidatorId, ValidatorIndex,
-	PARACHAIN_KEY_TYPE_ID, GroupIndex,
+	ErasureChunk, GroupIndex, Hash, HashT, SessionIndex, ValidatorId, ValidatorIndex,
+	PARACHAIN_KEY_TYPE_ID,
 };
 use polkadot_subsystem::{
 	jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem,
@@ -47,20 +48,18 @@ use super::{
 /// It should be ensured that a cached session stays live in the cache as long as we might need it.
 /// A warning will be logged, if an already dead entry gets fetched.
 pub struct SessionCache {
-	/// Maintain caches for session information for currently relay parents of interest.
+	/// Get the session index for a given relay parent.
 	///
-	/// Fast path - if we have an entry here, no query to the runtime is necessary at all.
-	by_relay_parent: HashMap<Hash, Weak<SessionInfo>>,
+	/// We query this up to a 100 times per block, so caching it here without roundtrips over the
+	/// overseer seems sensible.
+	session_index_cache: LruCache<Hash, SessionIndex>,
 
 	/// Look up cached sessions by SessionIndex.
 	///
-	/// Slower path - we still have to look up the `SessionIndex` in the runtime, but still might have
-	/// the session ready already.
-	///
 	/// Note: Performance of fetching is really secondary here, but we need to ensure we are going
 	/// to get any existing cache entry, before fetching new information, as we should not mess up
-	/// the order of validators.
-	by_session_index: HashMap<SessionIndex, Weak<SessionInfo>>,
+	/// the order of validators. (We want live TCP connections wherever possible.)
+	session_info_cache: LruCache<SessionIndex, SessionInfo>,
 
 	/// Key store for determining whether we are a validator and what `ValidatorIndex` we have.
 	keystore: SyncCryptoStorePtr,
@@ -79,6 +78,7 @@ pub struct SessionInfo {
 
 	/// Information about ourself:
 	pub our_index: ValidatorIndex,
+
 	//// Remember to which group we blong, so we won't start fetching chunks for candidates we
 	//// backed our selves.
 	// TODO: Implement this:
@@ -98,8 +98,10 @@ pub struct BadValidators {
 impl SessionCache {
 	pub(crate) fn new(keystore: SyncCryptoStorePtr) -> Self {
 		SessionCache {
-			by_relay_parent: HashMap::new(),
-			by_session_index: HashMap::new(),
+			// 5 relatively conservative, 1 to 2 should suffice:
+			session_index_cache: LruCache::new(5),
+			// We need to cache the current and the last session the most:
+			session_info_cache: LruCache::new(2),
 			keystore,
 		}
 	}
@@ -112,37 +114,64 @@ impl SessionCache {
 		&mut self,
 		ctx: &mut Context,
 		parent: Hash,
-	) -> Result<Option<Rc<SessionInfo>>>
+	) -> Result<Option<SessionInfo>>
 	where
 		Context: SubsystemContext,
 	{
-		if let Some(info) = self.get_by_relay_parent(parent) {
-			return Ok(Some(info));
-		}
-		let session_index =
-			recv_runtime(request_session_index_for_child_ctx(parent, ctx).await).await?;
-		if let Some(info) = self.get_by_session_index(session_index) {
-			self.by_relay_parent.insert(parent, Rc::downgrade(&info));
-			return Ok(Some(info));
-		}
+		let session_index = match self.session_index_cache.get(parent) {
+			Some(index) => index,
+			None => {
+				let index =
+					recv_runtime(request_session_index_for_child_ctx(parent, ctx).await)
+						.await?;
+				self.session_index_cache.put(parent, index);
+				index
+			}
+		};
 
-		// About to fetch new stuff, time to get rid of dead bodies: We keep relay_parent to
-		// session info matches way longer than necessary (for an entire session), but the overhead
-		// should be low enough to not matter.
-		self.bury_dead();
+		if let Some(info) = self.session_info_cache.get(session_index) {
+			return Ok(Some(info.clone()));
+		}
 
 		if let Some(info) = self
 			.query_info_from_runtime(ctx, parent, session_index)
 			.await?
 		{
-			self.by_relay_parent.insert(parent, Rc::downgrade(&info));
-			self.by_session_index
-				.insert(session_index, Rc::downgrade(&info));
+			self.session_info_cache.put(session_index, info.clone());
 			return Ok(Some(info));
 		}
 		Ok(None)
 	}
 
+	pub async with_session_info<Context, F, R>(
+		&mut self,
+		ctx: &mut Context,
+		parent: Hash,
+		with_info: F,
+		) -> R 
+		where
+		Context: SubsystemContext,
+		F: Fn(info: &SessionInfo) -> R
+	{
+	}
+
+	/// Make sure we try unresponsive or misbehaving validators last.
+	pub fn report_bad(&mut self, report: BadValidators) -> Result<()> {
+		let session = self
+			.session_info_cache
+			.get_mut(&report.session_index)
+			.ok_or(Error::ReportBadValidators("Session is not cached."))?;
+		let group = session
+			.validator_groups
+			.get_mut(report.group_index.0 as usize)
+			.ok_or(Error::ReportBadValidators("Validator group not found"))?;
+		let bad_set = report.bad_validators.iter().collect::<HashSet<_>>();
+		// Put the bad boys last:
+		group.retain(|v| !bad_set.contains(v));
+		group.append(report.bad_validators);
+		Ok(())
+	}
+
 	/// Get session info for a particular relay parent.
 	///
 	/// Returns: None, if no entry for that relay parent exists in the cache (or it was dead
@@ -154,8 +183,7 @@ impl SessionCache {
 
 	/// Get session info for a given `SessionIndex`.
 	fn get_by_session_index(&self, session_index: SessionIndex) -> Option<Rc<SessionInfo>> {
-		let weak_ref = self.by_session_index.get(&session_index)?;
-		upgrade_report_dead(weak_ref)
+		self.by_session_index.get(&session_index)
 	}
 
 	/// Query needed information from runtime.
@@ -168,7 +196,7 @@ impl SessionCache {
 		ctx: &mut Context,
 		parent: Hash,
 		session_index: SessionIndex,
-	) -> Result<Option<Rc<SessionInfo>>>
+	) -> Result<Option<SessionInfo>>
 	where
 		Context: SubsystemContext,
 	{
@@ -201,11 +229,11 @@ impl SessionCache {
 				})
 				.collect();
 
-			let info = Rc::new(SessionInfo {
+			let info = SessionInfo {
 				validator_groups,
 				our_index,
 				session_index,
-			});
+			};
 			return Ok(Some(info));
 		}
 		return Ok(None);

From 72704ee0e2341416308edbbc450d8cd77aee887f Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Thu, 18 Feb 2021 12:50:43 +0100
Subject: [PATCH 19/60] Use proper lru cache.

---
 .../availability-distribution/Cargo.toml      |  1 +
 .../availability-distribution/src/error.rs    |  4 +
 .../src/fetch_task.rs                         | 72 ++++++++++------
 .../src/session_cache.rs                      | 85 +++++++------------
 .../availability-distribution/src/state.rs    | 36 +++-----
 5 files changed, 94 insertions(+), 104 deletions(-)

diff --git a/node/network/availability-distribution/Cargo.toml b/node/network/availability-distribution/Cargo.toml
index 03cd654d6f0a..7bbd73bdbfd6 100644
--- a/node/network/availability-distribution/Cargo.toml
+++ b/node/network/availability-distribution/Cargo.toml
@@ -21,6 +21,7 @@ sp-keystore = { git = "https://github.com/paritytech/substrate", branch = "maste
 thiserror = "1.0.23"
 itertools = "0.10.0"
 rand = "0.8.3"
+lru = "0.6.5"
 
 [dev-dependencies]
 polkadot-subsystem-testhelpers = { package = "polkadot-node-subsystem-test-helpers", path = "../../subsystem-test-helpers" }
diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs
index c507d5835604..c6ccba4ab61c 100644
--- a/node/network/availability-distribution/src/error.rs
+++ b/node/network/availability-distribution/src/error.rs
@@ -59,6 +59,10 @@ pub enum Error {
 	/// Spawning a running task failed.
 	#[error("Spawning subsystem task failed")]
 	SpawnTask(#[source] SubsystemError),
+
+	/// Reporting bad validators failed.
+	#[error("Reporting bad validators failed")]
+	ReportBadValidators(&'static str),
 }
 
 pub type Result<T> = std::result::Result<T, Error>;
diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/fetch_task.rs
index a73edc50a00f..d12e1c76c984 100644
--- a/node/network/availability-distribution/src/fetch_task.rs
+++ b/node/network/availability-distribution/src/fetch_task.rs
@@ -21,7 +21,7 @@ use std::rc::Rc;
 use futures::channel::mpsc;
 use futures::channel::oneshot;
 use futures::future::select;
-use futures::{SinkExt, FutureExt};
+use futures::{FutureExt, SinkExt};
 
 use polkadot_erasure_coding::branch_hash;
 use polkadot_node_network_protocol::request_response::{
@@ -43,7 +43,20 @@ use polkadot_subsystem::{
 	Subsystem, SubsystemContext, SubsystemError, SubsystemResult,
 };
 
-use super::{session_cache::{SessionInfo, BadValidators}, LOG_TARGET, error::{Error, Result}};
+use super::{
+	error::{Error, Result},
+	session_cache::{BadValidators, SessionInfo},
+	LOG_TARGET,
+};
+
+/// Configuration for a `FetchTask`
+///
+/// This exists to separate preparation of a `FetchTask` from actual starting it, which is
+/// beneficial as this allows as for taking session info by reference.
+pub struct FetchTaskConfig {
+	prepared_running: RunningTask,
+	live_in: HashSet<Hash>,
+}
 
 pub struct FetchTask {
 	/// For what relay parents this task is relevant.
@@ -56,9 +69,6 @@ pub struct FetchTask {
 	/// We keep the task around in state `Fetched` until `live_in` becomes empty, to make
 	/// sure we won't re-fetch an already fetched candidate.
 	state: FetchedState,
-
-	/// Session information.
-	session: Rc<SessionInfo>,
 }
 
 /// State of a particular candidate chunk fetching process.
@@ -109,20 +119,17 @@ struct RunningTask {
 	sender: mpsc::Sender<FromFetchTask>,
 }
 
-impl FetchTask {
-	/// Start fetching a chunk.
-	pub async fn start<Context>(
-		ctx: &mut Context,
+impl FetchTaskConfig {
+	/// Create a new configuration for a [`FetchTask`].
+	///
+	/// The result of this function can be passed into [`FetchTask::start`].
+	pub fn new(
 		leaf: Hash,
-		core: OccupiedCore,
-		session_info: Rc<SessionInfo>,
+		core: &OccupiedCore,
 		sender: mpsc::Sender<FromFetchTask>,
-	) -> Result<Self>
-	where
-		Context: SubsystemContext,
-	{
-		let (handle, kill) = oneshot::channel();
-		let running =  RunningTask {
+		session_info: &SessionInfo,
+	) -> Self {
+		let prepared_running =  RunningTask {
 			session_index: session_info.session_index,
 			group_index: core.group_responsible,
 			group: session_info.validator_groups.get(core.group_responsible.0 as usize).expect("The responsible group of a candidate should be available in the corresponding session. qed.").clone(),
@@ -134,13 +141,30 @@ impl FetchTask {
 			relay_parent: core.candidate_descriptor.relay_parent,
 			sender,
 		};
-		ctx.spawn("chunk-fetcher", running.run(kill).boxed())
+		FetchTaskConfig {
+			live_in: vec![leaf].into_iter().collect(),
+			prepared_running,
+		}
+	}
+}
+
+impl FetchTask {
+	/// Start fetching a chunk.
+	pub async fn start<Context>(config: FetchTaskConfig, ctx: &mut Context) -> Result<Self>
+	where
+		Context: SubsystemContext,
+	{
+		let FetchTaskConfig {
+			prepared_running,
+			live_in,
+		} = config;
+		let (handle, kill) = oneshot::channel();
+		ctx.spawn("chunk-fetcher", prepared_running.run(kill).boxed())
 			.await
 			.map_err(|e| Error::SpawnTask(e))?;
 		Ok(FetchTask {
-			live_in: vec![leaf].into_iter().collect(),
+			live_in,
 			state: FetchedState::Started(handle),
-			session: session_info,
 		})
 	}
 
@@ -151,8 +175,8 @@ impl FetchTask {
 
 	/// Remove leaves and cancel the task, if it was the last one and the task has still been
 	/// fetching.
-	pub fn remove_leaves(&mut self, leaves: HashSet<Hash>) {
-		self.live_in.difference(&leaves);
+	pub fn remove_leaves(&mut self, leaves: &HashSet<Hash>) {
+		self.live_in.difference(leaves);
 		if self.live_in.is_empty() {
 			self.state = FetchedState::Canceled
 		}
@@ -162,7 +186,7 @@ impl FetchTask {
 	///
 	/// That is, it is either canceled, succeeded or failed.
 	pub fn is_finished(&self) -> bool {
-		match self.state {
+		match &self.state {
 			FetchedState::Canceled => true,
 			FetchedState::Started(sender) => sender.is_canceled(),
 		}
@@ -303,7 +327,7 @@ impl RunningTask {
 	/// Store given chunk and log any error.
 	async fn store_chunk(&mut self, chunk: ErasureChunk) {
 		let (tx, rx) = oneshot::channel();
-		self.sender
+		let r = self.sender
 			.send(FromFetchTask::Message(AllMessages::AvailabilityStore(
 				AvailabilityStoreMessage::StoreChunk {
 					candidate_hash: self.request.candidate_hash,
diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs
index 2ebc353f4fae..f403df2b0c09 100644
--- a/node/network/availability-distribution/src/session_cache.rs
+++ b/node/network/availability-distribution/src/session_cache.rs
@@ -66,6 +66,7 @@ pub struct SessionCache {
 }
 
 /// Localized session information, tailored for the needs of availability distribution.
+#[derive(Clone)]
 pub struct SessionInfo {
 	/// The index of this session.
 	pub session_index: SessionIndex,
@@ -118,8 +119,27 @@ impl SessionCache {
 	where
 		Context: SubsystemContext,
 	{
-		let session_index = match self.session_index_cache.get(parent) {
-			Some(index) => index,
+		self.with_session_info(ctx, parent, Clone::clone).await
+	}
+
+	/// Tries to retrieve `SessionInfo` and calls `with_info` if successful.
+	///
+	/// If this node is not a validator, the function will return `None`.
+	///
+	/// Use this function over `fetch_session_info` if all you need is a reference to
+	/// `SessionInfo`, as it avoids an expensive clone.
+	pub async fn with_session_info<Context, F, R>(
+		&mut self,
+		ctx: &mut Context,
+		parent: Hash,
+		with_info: F,
+		) -> Result<Option<R>>
+		where
+		Context: SubsystemContext,
+		F: FnOnce(&SessionInfo) -> R
+	{
+		let session_index = match self.session_index_cache.get(&parent) {
+			Some(index) => *index,
 			None => {
 				let index =
 					recv_runtime(request_session_index_for_child_ctx(parent, ctx).await)
@@ -129,34 +149,23 @@ impl SessionCache {
 			}
 		};
 
-		if let Some(info) = self.session_info_cache.get(session_index) {
-			return Ok(Some(info.clone()));
+		if let Some(info) = self.session_info_cache.get(&session_index) {
+			return Ok(Some(with_info(info)))
 		}
 
 		if let Some(info) = self
 			.query_info_from_runtime(ctx, parent, session_index)
 			.await?
 		{
-			self.session_info_cache.put(session_index, info.clone());
-			return Ok(Some(info));
+			let r = with_info(&info);
+			self.session_info_cache.put(session_index, info);
+			return Ok(Some(r));
 		}
 		Ok(None)
 	}
 
-	pub async with_session_info<Context, F, R>(
-		&mut self,
-		ctx: &mut Context,
-		parent: Hash,
-		with_info: F,
-		) -> R 
-		where
-		Context: SubsystemContext,
-		F: Fn(info: &SessionInfo) -> R
-	{
-	}
-
 	/// Make sure we try unresponsive or misbehaving validators last.
-	pub fn report_bad(&mut self, report: BadValidators) -> Result<()> {
+	pub fn report_bad(&mut self, mut report: BadValidators) -> Result<()> {
 		let session = self
 			.session_info_cache
 			.get_mut(&report.session_index)
@@ -168,24 +177,10 @@ impl SessionCache {
 		let bad_set = report.bad_validators.iter().collect::<HashSet<_>>();
 		// Put the bad boys last:
 		group.retain(|v| !bad_set.contains(v));
-		group.append(report.bad_validators);
+		group.append(&mut report.bad_validators);
 		Ok(())
 	}
 
-	/// Get session info for a particular relay parent.
-	///
-	/// Returns: None, if no entry for that relay parent exists in the cache (or it was dead
-	/// already - which should not happen.)
-	fn get_by_relay_parent(&self, parent: Hash) -> Option<Rc<SessionInfo>> {
-		let weak_ref = self.by_relay_parent.get(&parent)?;
-		upgrade_report_dead(weak_ref)
-	}
-
-	/// Get session info for a given `SessionIndex`.
-	fn get_by_session_index(&self, session_index: SessionIndex) -> Option<Rc<SessionInfo>> {
-		self.by_session_index.get(&session_index)
-	}
-
 	/// Query needed information from runtime.
 	///
 	/// We need to pass in the relay parent for our call to `request_session_info_ctx`. We should
@@ -252,26 +247,4 @@ impl SessionCache {
 		}
 		None
 	}
-
-	/// Get rid of the dead bodies from time to time.
-	fn bury_dead(&mut self) {
-		self.by_session_index
-			.retain(|_, info| info.upgrade().is_some());
-		self.by_relay_parent
-			.retain(|_, info| info.upgrade().is_some());
-	}
-}
-
-/// Upgrade a weak SessionInfo reference.
-///
-/// Warn if it was dead already, as this should not happen. Cache should stay valid at least as
-/// long as we need it.
-fn upgrade_report_dead(info: &Weak<SessionInfo>) -> Option<Rc<SessionInfo>> {
-	match info.upgrade() {
-		Some(info) => Some(info),
-		None => {
-			tracing::warn!(LOG_TARGET, "A no longer cached session got requested, this should not happen in normal operation.");
-			None
-		}
-	}
 }
diff --git a/node/network/availability-distribution/src/state.rs b/node/network/availability-distribution/src/state.rs
index 76d90e3581a3..10e6ddfef473 100644
--- a/node/network/availability-distribution/src/state.rs
+++ b/node/network/availability-distribution/src/state.rs
@@ -82,7 +82,7 @@ use polkadot_subsystem::{
 
 use super::{
 	error::recv_runtime,
-	fetch_task::{FetchTask, FromFetchTask},
+	fetch_task::{FetchTask, FromFetchTask, FetchTaskConfig},
 	session_cache::SessionCache,
 	Result, LOG_TARGET,
 };
@@ -138,20 +138,6 @@ impl ProtocolState {
 		Ok(())
 	}
 
-	pub(crate) async fn advance<Context>(&mut self, ctx: &mut Context) -> Result<()>
-	where
-		Context: SubsystemContext,
-	{
-		match self.rx.next().await {
-			Some(FromFetchTask::Message(m)) => ctx.send_message(m).await,
-			Some(FromFetchTask::Concluded(Some(bad_boys))) => {
-				self.session_cache.report_bad(bad_boys)?
-			}
-			Some(FromFetchTask::Concluded(None)) => {}
-		}
-		Ok(())
-	}
-
 	/// Start requesting chunks for newly imported heads.
 	async fn start_requesting_chunks<Context>(
 		&mut self,
@@ -175,7 +161,7 @@ impl ProtocolState {
 	fn stop_requesting_chunks(&mut self, obsolete_leaves: impl Iterator<Item = Hash>) {
 		let obsolete_leaves: HashSet<_> = obsolete_leaves.into_iter().collect();
 		self.fetches.retain(|&c_hash, task| {
-			task.remove_leaves(obsolete_leaves);
+			task.remove_leaves(&obsolete_leaves);
 			task.is_live()
 		})
 	}
@@ -201,18 +187,20 @@ impl ProtocolState {
 				Entry::Occupied(mut e) =>
 				// Just book keeping - we are already requesting that chunk:
 				{
-					e.get_mut().add_leaf(leaf)
+					e.get_mut().add_leaf(leaf);
 				}
 				Entry::Vacant(e) => {
-					let session_info = self
+					let tx = self.tx.clone();
+					let task_cfg = self
 						.session_cache
-						.fetch_session_info(ctx, core.candidate_descriptor.relay_parent)
+						.with_session_info(
+							ctx,
+							core.candidate_descriptor.relay_parent,
+							|info| FetchTaskConfig::new(leaf, &core, tx, info),
+						)
 						.await?;
-					if let Some(session_info) = session_info {
-						e.insert(
-							FetchTask::start(ctx, leaf, core, session_info, self.tx.clone())
-								.await?,
-						);
+					if let Some(task_cfg) = task_cfg {
+						e.insert(FetchTask::start(task_cfg, ctx).await?);
 					}
 					// Not a validator, nothing to do.
 				}

From 60a2faf94574f0a119742d2ef78d5746b3ea9a90 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Thu, 18 Feb 2021 16:43:48 +0100
Subject: [PATCH 20/60] Requester finished.

---
 .../availability-distribution/src/error.rs    |  4 ++
 .../src/fetch_task.rs                         |  9 ++--
 .../availability-distribution/src/lib.rs      | 45 ++++++++++++++-----
 .../availability-distribution/src/state.rs    | 39 +++++++++++++---
 4 files changed, 78 insertions(+), 19 deletions(-)

diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs
index c6ccba4ab61c..39a743c7ba24 100644
--- a/node/network/availability-distribution/src/error.rs
+++ b/node/network/availability-distribution/src/error.rs
@@ -63,6 +63,10 @@ pub enum Error {
 	/// Reporting bad validators failed.
 	#[error("Reporting bad validators failed")]
 	ReportBadValidators(&'static str),
+
+	/// Requester stream exhausted.
+	#[error("Erasure chunk requester stream exhausted")]
+	RequesterExhausted,
 }
 
 pub type Result<T> = std::result::Result<T, Error>;
diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/fetch_task.rs
index d12e1c76c984..c2ba2c077010 100644
--- a/node/network/availability-distribution/src/fetch_task.rs
+++ b/node/network/availability-distribution/src/fetch_task.rs
@@ -255,7 +255,7 @@ impl RunningTask {
 			self.store_chunk(chunk).await;
 			break;
 		}
-		self.conclude(bad_validators);
+		self.conclude(bad_validators).await;
 	}
 
 	/// Do request and return response, if successful.
@@ -280,6 +280,7 @@ impl RunningTask {
 				tracing::warn!(
 					target: LOG_TARGET,
 					origin= ?validator,
+					err= ?err,
 					"Peer sent us invalid erasure chunk data"
 				);
 				Err(TaskError::PeerError)
@@ -288,11 +289,12 @@ impl RunningTask {
 				tracing::warn!(
 					target: LOG_TARGET,
 					origin= ?validator,
+					err= ?err,
 					"Some network error occurred when fetching erasure chunk"
 				);
 				Err(TaskError::PeerError)
 			}
-			Err(RequestError::Canceled(err)) => {
+			Err(RequestError::Canceled(oneshot::Canceled)) => {
 				tracing::warn!(target: LOG_TARGET,
 							   origin= ?validator,
 							   "Erasure chunk request got canceled");
@@ -327,7 +329,8 @@ impl RunningTask {
 	/// Store given chunk and log any error.
 	async fn store_chunk(&mut self, chunk: ErasureChunk) {
 		let (tx, rx) = oneshot::channel();
-		let r = self.sender
+		let r = self
+			.sender
 			.send(FromFetchTask::Message(AllMessages::AvailabilityStore(
 				AvailabilityStoreMessage::StoreChunk {
 					candidate_hash: self.request.candidate_hash,
diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs
index a02bcc2a38f3..09c34742150c 100644
--- a/node/network/availability-distribution/src/lib.rs
+++ b/node/network/availability-distribution/src/lib.rs
@@ -14,14 +14,17 @@
 // You should have received a copy of the GNU General Public License
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
 
-
-use futures::{FutureExt, TryFutureExt};
+use futures::{future::Either, FutureExt, StreamExt, TryFutureExt};
 
 use sp_keystore::SyncCryptoStorePtr;
 
 use polkadot_subsystem::{
-	jaeger, errors::{ChainApiError, RuntimeApiError}, PerLeafSpan,
-	ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem, Subsystem, SubsystemContext, SubsystemError, messages::AvailabilityDistributionMessage
+	errors::{ChainApiError, RuntimeApiError},
+	jaeger,
+	messages::AllMessages,
+	messages::AvailabilityDistributionMessage,
+	ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem, Subsystem,
+	SubsystemContext, SubsystemError,
 };
 
 /// Error and [`Result`] type for this subsystem.
@@ -42,8 +45,6 @@ mod session_cache;
 
 const LOG_TARGET: &'static str = "availability_distribution";
 
-
-
 /// Availability Distribution metrics.
 /// TODO: Dummy for now.
 type Metrics = ();
@@ -73,7 +74,6 @@ where
 	}
 }
 
-
 impl AvailabilityDistributionSubsystem {
 	/// Create a new instance of the availability distribution.
 	pub fn new(keystore: SyncCryptoStorePtr, metrics: Metrics) -> Self {
@@ -85,13 +85,32 @@ impl AvailabilityDistributionSubsystem {
 	where
 		Context: SubsystemContext<Message = AvailabilityDistributionMessage> + Sync + Send,
 	{
-		let mut state = ProtocolState::new(self.keystore.clone());
+		let mut state = ProtocolState::new(self.keystore.clone()).fuse();
 		loop {
-			let message = ctx.recv().await?;
+			let action = {
+				let mut subsystem_next = ctx.recv().fuse();
+				futures::select! {
+					subsystem_msg = subsystem_next => Either::Left(subsystem_msg),
+					from_task = state.next() => Either::Right(from_task),
+				}
+			};
+			let message = match action {
+				Either::Left(subsystem_msg) => {
+					subsystem_msg.map_err(|e| Error::IncomingMessageChannel(e))?
+				}
+				Either::Right(from_task) => {
+					let from_task = from_task.ok_or(Error::RequesterExhausted)??;
+					ctx.send_message(from_task).await;
+					continue;
+				}
+			};
 			match message {
 				FromOverseer::Signal(OverseerSignal::ActiveLeaves(update)) => {
 					// Update the relay chain heads we are fetching our pieces for:
-					state.update_fetching_heads(&mut ctx, update).await?;
+					state
+						.get_mut()
+						.update_fetching_heads(&mut ctx, update)
+						.await?;
 				}
 				FromOverseer::Signal(OverseerSignal::BlockFinalized(..)) => {}
 				FromOverseer::Signal(OverseerSignal::Conclude) => {
@@ -106,8 +125,12 @@ impl AvailabilityDistributionSubsystem {
 						"To be implemented, see: https://github.com/paritytech/polkadot/issues/2306!",
 						);
 				}
+				FromOverseer::Communication {
+					msg: AvailabilityDistributionMessage::NetworkBridgeUpdateV1(_),
+				} => {
+					// There are currently no bridge updates we are interested in.
+				}
 			}
 		}
 	}
 }
-
diff --git a/node/network/availability-distribution/src/state.rs b/node/network/availability-distribution/src/state.rs
index 10e6ddfef473..5eeb92808ccd 100644
--- a/node/network/availability-distribution/src/state.rs
+++ b/node/network/availability-distribution/src/state.rs
@@ -58,11 +58,14 @@ use std::collections::{
 	hash_set::HashSet,
 };
 use std::iter::IntoIterator;
+use std::pin::Pin;
 use std::sync::Arc;
 
-use futures::channel::{mpsc, oneshot};
-use futures::StreamExt;
-use itertools::{Either, Itertools};
+use futures::{
+	channel::{mpsc, oneshot},
+	task::{Context, Poll},
+	Stream, StreamExt,
+};
 use jaeger::JaegerSpan;
 
 use sp_keystore::SyncCryptoStorePtr;
@@ -75,14 +78,14 @@ use polkadot_primitives::v1::{
 use polkadot_subsystem::{
 	errors::{ChainApiError, RuntimeApiError},
 	jaeger,
-	messages::AvailabilityDistributionMessage,
+	messages::{AllMessages, AvailabilityDistributionMessage},
 	ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem, Subsystem,
 	SubsystemContext, SubsystemError,
 };
 
 use super::{
 	error::recv_runtime,
-	fetch_task::{FetchTask, FromFetchTask, FetchTaskConfig},
+	fetch_task::{FetchTask, FetchTaskConfig, FromFetchTask},
 	session_cache::SessionCache,
 	Result, LOG_TARGET,
 };
@@ -210,6 +213,32 @@ impl ProtocolState {
 	}
 }
 
+impl Stream for ProtocolState {
+	type Item = Result<AllMessages>;
+
+	fn poll_next(
+		mut self: Pin<&mut Self>,
+		ctx: &mut Context,
+	) -> Poll<Option<Result<AllMessages>>> {
+		loop {
+			match Pin::new(&mut self.rx).poll_next(ctx) {
+				Poll::Ready(Some(FromFetchTask::Message(m))) => {
+					return Poll::Ready(Some(Ok(m)))
+				}
+				Poll::Ready(Some(FromFetchTask::Concluded(Some(bad_boys)))) => {
+					match self.session_cache.report_bad(bad_boys) {
+						Err(err) => return Poll::Ready(Some(Err(err))),
+						Ok(()) => continue,
+					}
+				}
+				Poll::Ready(Some(FromFetchTask::Concluded(None))) => continue,
+				Poll::Ready(None) => return Poll::Ready(None),
+				Poll::Pending => return Poll::Pending,
+			}
+		}
+	}
+}
+
 ///// Query all hashes and descriptors of candidates pending availability at a particular block.
 // #[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))]
 async fn query_occupied_cores<Context>(

From 452b55f2cd7582f10ebaa25bf4dbd7818dbc0dae Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Thu, 18 Feb 2021 18:42:52 +0100
Subject: [PATCH 21/60] ProtocolState -> Requester

Also make sure to not fetch our own chunk.
---
 Cargo.lock                                    |  8 +-
 .../availability-distribution/Cargo.toml      |  1 -
 .../availability-distribution/src/lib.rs      | 12 +--
 .../src/{state.rs => requester.rs}            | 77 ++++++-------------
 .../src/{ => requester}/fetch_task.rs         | 46 +++++++----
 .../src/session_cache.rs                      | 32 ++++++--
 6 files changed, 91 insertions(+), 85 deletions(-)
 rename node/network/availability-distribution/src/{state.rs => requester.rs} (64%)
 rename node/network/availability-distribution/src/{ => requester}/fetch_task.rs (93%)

diff --git a/Cargo.lock b/Cargo.lock
index 958f491b5adf..84d0aedb5c27 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3458,9 +3458,9 @@ dependencies = [
 
 [[package]]
 name = "lru"
-version = "0.6.3"
+version = "0.6.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3aae342b73d57ad0b8b364bd12584819f2c1fe9114285dfcf8b0722607671635"
+checksum = "1f374d42cdfc1d7dbf3d3dec28afab2eb97ffbf43a3234d795b5986dbf4b90ba"
 dependencies = [
  "hashbrown",
 ]
@@ -5067,15 +5067,19 @@ version = "0.1.0"
 dependencies = [
  "assert_matches",
  "futures 0.3.12",
+ "lru",
  "maplit",
  "parity-scale-codec",
  "polkadot-erasure-coding",
+ "polkadot-node-core-runtime-api",
  "polkadot-node-network-protocol",
  "polkadot-node-subsystem",
  "polkadot-node-subsystem-test-helpers",
  "polkadot-node-subsystem-util",
  "polkadot-primitives",
+ "rand 0.8.3",
  "sc-keystore",
+ "sc-network",
  "sp-application-crypto",
  "sp-core",
  "sp-keyring",
diff --git a/node/network/availability-distribution/Cargo.toml b/node/network/availability-distribution/Cargo.toml
index 7bbd73bdbfd6..5483d87deff7 100644
--- a/node/network/availability-distribution/Cargo.toml
+++ b/node/network/availability-distribution/Cargo.toml
@@ -19,7 +19,6 @@ sp-application-crypto = { git = "https://github.com/paritytech/substrate", branc
 sp-core = { git = "https://github.com/paritytech/substrate", branch = "master", features = ["std"]  }
 sp-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" }
 thiserror = "1.0.23"
-itertools = "0.10.0"
 rand = "0.8.3"
 lru = "0.6.5"
 
diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs
index 09c34742150c..46dcd74f7498 100644
--- a/node/network/availability-distribution/src/lib.rs
+++ b/node/network/availability-distribution/src/lib.rs
@@ -32,13 +32,9 @@ mod error;
 pub use error::Error;
 use error::Result;
 
-/// The actual implementation of running availability distribution.
-mod state;
-/// State of a running availability-distribution subsystem.
-use state::ProtocolState;
-
-/// A task fetching a particular chunk.
-mod fetch_task;
+/// `Requester` taking care of requesting chunks for candidates pending availability.
+mod requester;
+use requester::Requester;
 
 /// Cache for session information.
 mod session_cache;
@@ -85,7 +81,7 @@ impl AvailabilityDistributionSubsystem {
 	where
 		Context: SubsystemContext<Message = AvailabilityDistributionMessage> + Sync + Send,
 	{
-		let mut state = ProtocolState::new(self.keystore.clone()).fuse();
+		let mut state = Requester::new(self.keystore.clone()).fuse();
 		loop {
 			let action = {
 				let mut subsystem_next = ctx.recv().fuse();
diff --git a/node/network/availability-distribution/src/state.rs b/node/network/availability-distribution/src/requester.rs
similarity index 64%
rename from node/network/availability-distribution/src/state.rs
rename to node/network/availability-distribution/src/requester.rs
index 5eeb92808ccd..58237f8deb9b 100644
--- a/node/network/availability-distribution/src/state.rs
+++ b/node/network/availability-distribution/src/requester.rs
@@ -14,44 +14,8 @@
 // You should have received a copy of the GNU General Public License
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
 
-//! `ProtocolState` representing a running availability distribution subsystem.
-//!
-//! We keep track of [`FetchTask`]s, which get created on [`ActiveLeavesUpdate`]s for each occupied
-//! core in the leaves, if we have not yet created it before. We keep track for which
-//! relay parents a `FetchTask` is considered live (corresponding slot is occupied with the
-//! candidate fetched). Once there is no relay parent left for which that task is considered live,
-//! it gets removed.
-//!
-//! We keep that task around as long as its corresponding candidate is considered pending
-//! availability, even if we fetched our chunk already. This is so we won't fetch our piece again,
-//! just because the candidate is still pending availability in the next block.
-//!
-//! We are also dependent on session information. We need to know which validators are in a
-//! particular validator group, backing our candidate, so we can request our erasure chunk from
-//! them.
-//!
-//! We want to randomize the list of validators in each group, so we get a
-//! random order of validators to try to get the chunk from. This is to ensure load balancing, each
-//! requesting validator should have a different order, thus trying different validators.
-//!
-//! But We would like to keep that randomized order around for an entire session, so our particular
-//! validator will always request from the same validators, thus making sure it will find an open
-//! network connection on each request.
-//!
-//! (TODO: What to do on session boundaries? Initial delay acceptable? Connect with some fake
-//! request to future validators? Use a peer set after all and connect that to the future session?)
-//!
-//! So we need to keep some customized session info around, which seems to be a good idea for
-//! performance reasons anyway. That's where `SessionCache` comes into play. It is used to keep
-//! session information around as long as we need it. But how long do we need it? How do we manage
-//! that cache? We can't rely on `ActiveLeavesUpdate`s heads alone, as we might get occupied slots
-//! for heads we never got an `ActiveLeavesUpdate` from, therefore we don't populate the session
-//! cache with sessions our leaves correspond to, but directly with the sessions of the relay
-//! parents of our `CandidateDescriptor`s. So, its clear how to populate the cache, but when can we
-//! get rid of cached session information? If for sure is safe to do when there is no
-//! candidate/FetchTask around anymore which references it. Thus the cache simply consists of
-//! `Weak` pointers to the actual session infos and the `FetchTask`s keep `Rc`s, therefore we know
-//! exactly when we can get rid of a cache entry by means of the Weak pointer evaluating to `None`.
+//! Requester takes care of requesting erasure chunks for candidates that are pending
+//! availability.
 
 use std::collections::{
 	hash_map::{Entry, HashMap},
@@ -83,21 +47,26 @@ use polkadot_subsystem::{
 	SubsystemContext, SubsystemError,
 };
 
-use super::{
-	error::recv_runtime,
-	fetch_task::{FetchTask, FetchTaskConfig, FromFetchTask},
-	session_cache::SessionCache,
-	Result, LOG_TARGET,
-};
+use super::{error::recv_runtime, session_cache::SessionCache, Result, LOG_TARGET};
+
+/// A task fetching a particular chunk.
+mod fetch_task;
+use fetch_task::{FetchTask, FetchTaskConfig, FromFetchTask};
 
-/// A running instance of this subsystem.
-pub struct ProtocolState {
+/// Requester takes care of requesting erasure chunks from backing groups and stores them in the
+/// av store.
+///
+/// It implements a stream that needs to be advanced for it making progress.
+pub struct Requester {
 	/// Candidates we need to fetch our chunk for.
+	///
+	/// We keep those around as long as a candidate is pending availability on some leaf, so we
+	/// won't fetch chunks multiple times.
 	fetches: HashMap<CandidateHash, FetchTask>,
 
 	/// Localized information about sessions we are currently interested in.
 	///
-	/// This is usually the current one and at session boundaries also the last one.
+	/// This is the current one and the last one.
 	session_cache: SessionCache,
 
 	/// Sender to be cloned for `FetchTask`s.
@@ -107,11 +76,15 @@ pub struct ProtocolState {
 	rx: mpsc::Receiver<FromFetchTask>,
 }
 
-impl ProtocolState {
+impl Requester {
+	/// Create a new `Requester`.
+	///
+	/// You must feed it with `ActiveLeavesUpdate` via `update_fetching_heads` and make it progress
+	/// by advancing the stream.
 	pub(crate) fn new(keystore: SyncCryptoStorePtr) -> Self {
 		// All we do is forwarding messages, no need to make this big.
 		let (tx, rx) = mpsc::channel(1);
-		ProtocolState {
+		Requester {
 			fetches: HashMap::new(),
 			session_cache: SessionCache::new(keystore),
 			tx,
@@ -120,7 +93,7 @@ impl ProtocolState {
 	}
 	/// Update heads that need availability distribution.
 	///
-	/// For all active heads we will be fetching our chunk for availabilty distribution.
+	/// For all active heads we will be fetching our chunks for availabilty distribution.
 	pub(crate) async fn update_fetching_heads<Context>(
 		&mut self,
 		ctx: &mut Context,
@@ -159,8 +132,6 @@ impl ProtocolState {
 
 	/// Stop requesting chunks for obsolete heads.
 	///
-	/// Returns relay_parents which became irrelevant for availability fetching (are not
-	/// referenced by any candidate anymore).
 	fn stop_requesting_chunks(&mut self, obsolete_leaves: impl Iterator<Item = Hash>) {
 		let obsolete_leaves: HashSet<_> = obsolete_leaves.into_iter().collect();
 		self.fetches.retain(|&c_hash, task| {
@@ -213,7 +184,7 @@ impl ProtocolState {
 	}
 }
 
-impl Stream for ProtocolState {
+impl Stream for Requester {
 	type Item = Result<AllMessages>;
 
 	fn poll_next(
diff --git a/node/network/availability-distribution/src/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task.rs
similarity index 93%
rename from node/network/availability-distribution/src/fetch_task.rs
rename to node/network/availability-distribution/src/requester/fetch_task.rs
index c2ba2c077010..6626316c455f 100644
--- a/node/network/availability-distribution/src/fetch_task.rs
+++ b/node/network/availability-distribution/src/requester/fetch_task.rs
@@ -43,7 +43,7 @@ use polkadot_subsystem::{
 	Subsystem, SubsystemContext, SubsystemError, SubsystemResult,
 };
 
-use super::{
+use crate::{
 	error::{Error, Result},
 	session_cache::{BadValidators, SessionInfo},
 	LOG_TARGET,
@@ -54,7 +54,7 @@ use super::{
 /// This exists to separate preparation of a `FetchTask` from actual starting it, which is
 /// beneficial as this allows as for taking session info by reference.
 pub struct FetchTaskConfig {
-	prepared_running: RunningTask,
+	prepared_running: Option<RunningTask>,
 	live_in: HashSet<Hash>,
 }
 
@@ -129,7 +129,17 @@ impl FetchTaskConfig {
 		sender: mpsc::Sender<FromFetchTask>,
 		session_info: &SessionInfo,
 	) -> Self {
-		let prepared_running =  RunningTask {
+		let live_in = vec![leaf].into_iter().collect();
+
+		// Don't run tasks for our backing group:
+		if session_info.our_group == core.group_responsible {
+			return FetchTaskConfig {
+				live_in,
+				prepared_running: None,
+			};
+		}
+
+		let prepared_running = RunningTask {
 			session_index: session_info.session_index,
 			group_index: core.group_responsible,
 			group: session_info.validator_groups.get(core.group_responsible.0 as usize).expect("The responsible group of a candidate should be available in the corresponding session. qed.").clone(),
@@ -142,8 +152,8 @@ impl FetchTaskConfig {
 			sender,
 		};
 		FetchTaskConfig {
-			live_in: vec![leaf].into_iter().collect(),
-			prepared_running,
+			live_in,
+			prepared_running: Some(prepared_running),
 		}
 	}
 }
@@ -158,14 +168,24 @@ impl FetchTask {
 			prepared_running,
 			live_in,
 		} = config;
-		let (handle, kill) = oneshot::channel();
-		ctx.spawn("chunk-fetcher", prepared_running.run(kill).boxed())
-			.await
-			.map_err(|e| Error::SpawnTask(e))?;
-		Ok(FetchTask {
-			live_in,
-			state: FetchedState::Started(handle),
-		})
+
+		if let Some(running) = prepared_running {
+			let (handle, kill) = oneshot::channel();
+
+			ctx.spawn("chunk-fetcher", running.run(kill).boxed())
+				.await
+				.map_err(|e| Error::SpawnTask(e))?;
+
+			Ok(FetchTask {
+				live_in,
+				state: FetchedState::Started(handle),
+			})
+		} else {
+			Ok(FetchTask {
+				live_in,
+				state: FetchedState::Canceled,
+			})
+		}
 	}
 
 	/// Add the given leaf to the relay parents which are making this task relevant.
diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs
index f403df2b0c09..fd4af299a5de 100644
--- a/node/network/availability-distribution/src/session_cache.rs
+++ b/node/network/availability-distribution/src/session_cache.rs
@@ -80,10 +80,9 @@ pub struct SessionInfo {
 	/// Information about ourself:
 	pub our_index: ValidatorIndex,
 
-	//// Remember to which group we blong, so we won't start fetching chunks for candidates we
-	//// backed our selves.
-	// TODO: Implement this:
-	// pub our_group: GroupIndex,
+	/// Remember to which group we belong, so we won't start fetching chunks for candidates those
+	/// candidates (We should have them via PoV distribution).
+	pub our_group: GroupIndex,
 }
 
 /// Report of bad validators.
@@ -133,10 +132,10 @@ impl SessionCache {
 		ctx: &mut Context,
 		parent: Hash,
 		with_info: F,
-		) -> Result<Option<R>>
-		where
+	) -> Result<Option<R>>
+	where
 		Context: SubsystemContext,
-		F: FnOnce(&SessionInfo) -> R
+		F: FnOnce(&SessionInfo) -> R,
 	{
 		let session_index = match self.session_index_cache.get(&parent) {
 			Some(index) => *index,
@@ -150,7 +149,7 @@ impl SessionCache {
 		};
 
 		if let Some(info) = self.session_info_cache.get(&session_index) {
-			return Ok(Some(with_info(info)))
+			return Ok(Some(with_info(info)));
 		}
 
 		if let Some(info) = self
@@ -205,6 +204,22 @@ impl SessionCache {
 			.ok_or(Error::NoSuchSession(session_index))?;
 
 		if let Some(our_index) = self.get_our_index(validators).await {
+			// Get our group index:
+			let our_group = validator_groups
+				.iter()
+				.enumerate()
+				.find_map(|(i, g)| {
+					g.iter().find_map(|v| {
+						if *v == our_index {
+							Some(GroupIndex(i as u32))
+						} else {
+							None
+						}
+					})
+				})
+				// TODO: Make sure this is correct and should be enforced:
+				.expect("Every validator should be in a validator group. qed.");
+
 			// Shuffle validators in groups:
 			let mut rng = thread_rng();
 			for g in validator_groups.iter_mut() {
@@ -228,6 +243,7 @@ impl SessionCache {
 				validator_groups,
 				our_index,
 				session_index,
+				our_group,
 			};
 			return Ok(Some(info));
 		}

From 2b9b983126d13d2e15eec1e2da558680bf06c6a9 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Thu, 18 Feb 2021 22:23:11 +0100
Subject: [PATCH 22/60] Cleanup + fixes.

---
 .../availability-distribution/src/error.rs    |  5 +----
 .../availability-distribution/src/lib.rs      |  8 ++------
 .../src/requester.rs                          | 20 ++++++-------------
 .../src/requester/fetch_task.rs               | 19 +++++++-----------
 .../src/session_cache.rs                      | 14 ++++---------
 .../protocol/src/request_response/v1.rs       |  2 ++
 runtime/parachains/src/inclusion.rs           |  6 +++---
 7 files changed, 25 insertions(+), 49 deletions(-)

diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs
index 39a743c7ba24..d442f7e00686 100644
--- a/node/network/availability-distribution/src/error.rs
+++ b/node/network/availability-distribution/src/error.rs
@@ -21,10 +21,7 @@ use futures::channel::oneshot;
 
 use polkadot_node_subsystem_util::Error as UtilError;
 use polkadot_primitives::v1::SessionIndex;
-use polkadot_subsystem::{
-	errors::{ChainApiError, RuntimeApiError},
-	SubsystemError,
-};
+use polkadot_subsystem::{errors::RuntimeApiError, SubsystemError};
 
 #[derive(Debug, Error)]
 pub enum Error {
diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs
index 46dcd74f7498..c0a156a53134 100644
--- a/node/network/availability-distribution/src/lib.rs
+++ b/node/network/availability-distribution/src/lib.rs
@@ -19,12 +19,8 @@ use futures::{future::Either, FutureExt, StreamExt, TryFutureExt};
 use sp_keystore::SyncCryptoStorePtr;
 
 use polkadot_subsystem::{
-	errors::{ChainApiError, RuntimeApiError},
-	jaeger,
-	messages::AllMessages,
-	messages::AvailabilityDistributionMessage,
-	ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem, Subsystem,
-	SubsystemContext, SubsystemError,
+	messages::AvailabilityDistributionMessage, FromOverseer, OverseerSignal, SpawnedSubsystem,
+	Subsystem, SubsystemContext, SubsystemError,
 };
 
 /// Error and [`Result`] type for this subsystem.
diff --git a/node/network/availability-distribution/src/requester.rs b/node/network/availability-distribution/src/requester.rs
index 58237f8deb9b..c1597bf295cc 100644
--- a/node/network/availability-distribution/src/requester.rs
+++ b/node/network/availability-distribution/src/requester.rs
@@ -26,28 +26,20 @@ use std::pin::Pin;
 use std::sync::Arc;
 
 use futures::{
-	channel::{mpsc, oneshot},
+	channel::mpsc,
 	task::{Context, Poll},
-	Stream, StreamExt,
+	Stream,
 };
-use jaeger::JaegerSpan;
 
 use sp_keystore::SyncCryptoStorePtr;
 
 use polkadot_node_subsystem_util::request_availability_cores_ctx;
-use polkadot_primitives::v1::{
-	BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState, ErasureChunk, Hash, HashT,
-	OccupiedCore, SessionIndex, ValidatorId, ValidatorIndex, PARACHAIN_KEY_TYPE_ID,
-};
+use polkadot_primitives::v1::{CandidateHash, CoreState, Hash, OccupiedCore};
 use polkadot_subsystem::{
-	errors::{ChainApiError, RuntimeApiError},
-	jaeger,
-	messages::{AllMessages, AvailabilityDistributionMessage},
-	ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem, Subsystem,
-	SubsystemContext, SubsystemError,
+	messages::AllMessages, ActiveLeavesUpdate, JaegerSpan, SubsystemContext,
 };
 
-use super::{error::recv_runtime, session_cache::SessionCache, Result, LOG_TARGET};
+use super::{error::recv_runtime, session_cache::SessionCache, Result};
 
 /// A task fetching a particular chunk.
 mod fetch_task;
@@ -134,7 +126,7 @@ impl Requester {
 	///
 	fn stop_requesting_chunks(&mut self, obsolete_leaves: impl Iterator<Item = Hash>) {
 		let obsolete_leaves: HashSet<_> = obsolete_leaves.into_iter().collect();
-		self.fetches.retain(|&c_hash, task| {
+		self.fetches.retain(|_, task| {
 			task.remove_leaves(&obsolete_leaves);
 			task.is_live()
 		})
diff --git a/node/network/availability-distribution/src/requester/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task.rs
index 6626316c455f..9aa4ace7bc10 100644
--- a/node/network/availability-distribution/src/requester/fetch_task.rs
+++ b/node/network/availability-distribution/src/requester/fetch_task.rs
@@ -15,8 +15,6 @@
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
 
 use std::collections::HashSet;
-use std::pin::Pin;
-use std::rc::Rc;
 
 use futures::channel::mpsc;
 use futures::channel::oneshot;
@@ -29,19 +27,13 @@ use polkadot_node_network_protocol::request_response::{
 	v1::{AvailabilityFetchingRequest, AvailabilityFetchingResponse},
 };
 use polkadot_primitives::v1::{
-	AuthorityDiscoveryId, BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState,
-	ErasureChunk, GroupIndex, Hash, HashT, OccupiedCore, SessionIndex, ValidatorId,
-	ValidatorIndex, PARACHAIN_KEY_TYPE_ID,
+	AuthorityDiscoveryId, BlakeTwo256, ErasureChunk, GroupIndex, Hash, HashT, OccupiedCore,
+	SessionIndex,
 };
 use polkadot_subsystem::messages::{
-	AllMessages, AvailabilityDistributionMessage, AvailabilityStoreMessage, ChainApiMessage,
-	NetworkBridgeEvent, NetworkBridgeMessage, RuntimeApiMessage, RuntimeApiRequest,
-};
-use polkadot_subsystem::{
-	errors::{ChainApiError, RuntimeApiError},
-	jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem,
-	Subsystem, SubsystemContext, SubsystemError, SubsystemResult,
+	AllMessages, AvailabilityStoreMessage, NetworkBridgeMessage,
 };
+use polkadot_subsystem::SubsystemContext;
 
 use crate::{
 	error::{Error, Result},
@@ -360,6 +352,9 @@ impl RunningTask {
 				},
 			)))
 			.await;
+		if let Err(err) = r {
+			tracing::error!(target: LOG_TARGET, err= ?err, "Storing erasure chunk failed, system shutting down?");
+		}
 
 		if let Err(oneshot::Canceled) = rx.await {
 			tracing::error!(target: LOG_TARGET, "Storing erasure chunk failed");
diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs
index fd4af299a5de..cb2293fe8614 100644
--- a/node/network/availability-distribution/src/session_cache.rs
+++ b/node/network/availability-distribution/src/session_cache.rs
@@ -14,8 +14,7 @@
 // You should have received a copy of the GNU General Public License
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
 
-use std::collections::{HashMap, HashSet};
-use std::rc::{Rc, Weak};
+use std::collections::HashSet;
 
 use lru::LruCache;
 use rand::{seq::SliceRandom, thread_rng};
@@ -29,18 +28,13 @@ use polkadot_node_subsystem_util::{
 };
 use polkadot_primitives::v1::SessionInfo as GlobalSessionInfo;
 use polkadot_primitives::v1::{
-	AuthorityDiscoveryId, BlakeTwo256, CandidateDescriptor, CandidateHash, CoreState,
-	ErasureChunk, GroupIndex, Hash, HashT, SessionIndex, ValidatorId, ValidatorIndex,
-	PARACHAIN_KEY_TYPE_ID,
-};
-use polkadot_subsystem::{
-	jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, PerLeafSpan, SpawnedSubsystem,
-	Subsystem, SubsystemContext, SubsystemError,
+	AuthorityDiscoveryId, GroupIndex, Hash, SessionIndex, ValidatorId, ValidatorIndex,
 };
+use polkadot_subsystem::SubsystemContext;
 
 use super::{
 	error::{recv_runtime, Result},
-	Error, LOG_TARGET,
+	Error,
 };
 
 /// Caching of session info as needed by availability distribution.
diff --git a/node/network/protocol/src/request_response/v1.rs b/node/network/protocol/src/request_response/v1.rs
index 53e58f6b48b1..24e6363b963d 100644
--- a/node/network/protocol/src/request_response/v1.rs
+++ b/node/network/protocol/src/request_response/v1.rs
@@ -26,7 +26,9 @@ use super::Protocol;
 /// Request an availability chunk.
 #[derive(Debug, Copy, Clone, Encode, Decode)]
 pub struct AvailabilityFetchingRequest {
+	/// Hash of candidate we want a chunk for.
 	pub candidate_hash: CandidateHash,
+	/// The index of the chunk to fetch.
 	pub index: ValidatorIndex,
 }
 
diff --git a/runtime/parachains/src/inclusion.rs b/runtime/parachains/src/inclusion.rs
index 68a0b6f01f1e..7329ff2e65be 100644
--- a/runtime/parachains/src/inclusion.rs
+++ b/runtime/parachains/src/inclusion.rs
@@ -297,7 +297,7 @@ impl<T: Config> Module<T> {
 					Error::<T>::UnoccupiedBitInBitfield,
 				);
 
-				let validator_public = &validators[signed_bitfield.validator_index() as usize];
+				let validator_public = &validators[signed_bitfield.validator_index().0 as usize];
 
 				signed_bitfield.check_signature(
 					&signing_context,
@@ -319,7 +319,7 @@ impl<T: Config> Module<T> {
 
 				// defensive check - this is constructed by loading the availability bitfield record,
 				// which is always `Some` if the core is occupied - that's why we're here.
-				let val_idx = signed_bitfield.validator_index() as usize;
+				let val_idx = signed_bitfield.validator_index().0 as usize;
 				if let Some(mut bit) = pending_availability.as_mut()
 					.and_then(|r| r.availability_votes.get_mut(val_idx))
 				{
@@ -532,7 +532,7 @@ impl<T: Config> Module<T> {
 									&signing_context,
 									group_vals.len(),
 									|idx| group_vals.get(idx)
-										.and_then(|i| validators.get(*i as usize))
+										.and_then(|i| validators.get(*i.0 as usize))
 										.map(|v| v.clone()),
 								);
 

From d683f102b0a156c0f888541488d246140319f668 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Thu, 18 Feb 2021 22:24:40 +0100
Subject: [PATCH 23/60] Remove unused functions

- FetchTask::is_finished
- SessionCache::fetch_session_info
---
 .../src/requester/fetch_task.rs                   | 10 ----------
 .../src/session_cache.rs                          | 15 ---------------
 2 files changed, 25 deletions(-)

diff --git a/node/network/availability-distribution/src/requester/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task.rs
index 9aa4ace7bc10..f228965ba17a 100644
--- a/node/network/availability-distribution/src/requester/fetch_task.rs
+++ b/node/network/availability-distribution/src/requester/fetch_task.rs
@@ -194,16 +194,6 @@ impl FetchTask {
 		}
 	}
 
-	/// Whether or not this task can be considered finished.
-	///
-	/// That is, it is either canceled, succeeded or failed.
-	pub fn is_finished(&self) -> bool {
-		match &self.state {
-			FetchedState::Canceled => true,
-			FetchedState::Started(sender) => sender.is_canceled(),
-		}
-	}
-
 	/// Whether or not there are still relay parents around with this candidate pending
 	/// availability.
 	pub fn is_live(&self) -> bool {
diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs
index cb2293fe8614..111459a542e0 100644
--- a/node/network/availability-distribution/src/session_cache.rs
+++ b/node/network/availability-distribution/src/session_cache.rs
@@ -99,21 +99,6 @@ impl SessionCache {
 			keystore,
 		}
 	}
-	/// Retrieve session info for the given relay parent.
-	///
-	/// This function will query the cache first and will only query the runtime on cache miss.
-	///
-	/// Returns: `Ok(None)` in case this node is not a validator in the current session.
-	pub async fn fetch_session_info<Context>(
-		&mut self,
-		ctx: &mut Context,
-		parent: Hash,
-	) -> Result<Option<SessionInfo>>
-	where
-		Context: SubsystemContext,
-	{
-		self.with_session_info(ctx, parent, Clone::clone).await
-	}
 
 	/// Tries to retrieve `SessionInfo` and calls `with_info` if successful.
 	///

From d7a8a312d1b53c91df9c22b6e6166748bd2ae825 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Fri, 19 Feb 2021 08:31:13 +0100
Subject: [PATCH 24/60] availability-distribution responding side.

---
 .../availability-distribution/src/error.rs    |  4 ++
 .../availability-distribution/src/lib.rs      | 12 ++--
 .../src/requester/fetch_task.rs               |  9 ++-
 .../src/responder.rs                          | 65 +++++++++++++++++++
 .../protocol/src/request_response/v1.rs       |  3 +
 5 files changed, 85 insertions(+), 8 deletions(-)
 create mode 100644 node/network/availability-distribution/src/responder.rs

diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs
index d442f7e00686..658bad97f3ce 100644
--- a/node/network/availability-distribution/src/error.rs
+++ b/node/network/availability-distribution/src/error.rs
@@ -64,6 +64,10 @@ pub enum Error {
 	/// Requester stream exhausted.
 	#[error("Erasure chunk requester stream exhausted")]
 	RequesterExhausted,
+
+	/// Sending response failed.
+	#[error("Sending a request's response failed.")]
+	SendResponse,
 }
 
 pub type Result<T> = std::result::Result<T, Error>;
diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs
index c0a156a53134..9ee9661affe0 100644
--- a/node/network/availability-distribution/src/lib.rs
+++ b/node/network/availability-distribution/src/lib.rs
@@ -32,6 +32,10 @@ use error::Result;
 mod requester;
 use requester::Requester;
 
+/// Responding to erasure chunk requests:
+mod responder;
+use responder::answer_request;
+
 /// Cache for session information.
 mod session_cache;
 
@@ -109,13 +113,9 @@ impl AvailabilityDistributionSubsystem {
 					return Ok(());
 				}
 				FromOverseer::Communication {
-					msg: AvailabilityDistributionMessage::AvailabilityFetchingRequest(_),
+					msg: AvailabilityDistributionMessage::AvailabilityFetchingRequest(req),
 				} => {
-					// TODO: Implement issue 2306:
-					tracing::warn!(
-						target: LOG_TARGET,
-						"To be implemented, see: https://github.com/paritytech/polkadot/issues/2306!",
-						);
+					answer_request(&mut ctx, req).await?
 				}
 				FromOverseer::Communication {
 					msg: AvailabilityDistributionMessage::NetworkBridgeUpdateV1(_),
diff --git a/node/network/availability-distribution/src/requester/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task.rs
index f228965ba17a..501a30abce70 100644
--- a/node/network/availability-distribution/src/requester/fetch_task.rs
+++ b/node/network/availability-distribution/src/requester/fetch_task.rs
@@ -234,17 +234,22 @@ impl RunningTask {
 						target: LOG_TARGET,
 						"Node seems to be shutting down, canceling fetch task"
 					);
-					return;
+					return
 				}
 				Err(TaskError::PeerError) => {
 					bad_validators.push(validator);
-					continue;
+					continue
 				}
 			};
 			let chunk = match resp {
 				AvailabilityFetchingResponse::Chunk(resp) => {
 					resp.reconstruct_erasure_chunk(&self.request)
 				}
+				AvailabilityFetchingResponse::NoSuchChunk => {
+					tracing::debug!(target: LOG_TARGET, validator = ?validator, "Validator did not have our chunk");
+					bad_validators.push(validator);
+					continue
+				}
 			};
 
 			// Data genuine?
diff --git a/node/network/availability-distribution/src/responder.rs b/node/network/availability-distribution/src/responder.rs
new file mode 100644
index 000000000000..23ec112030df
--- /dev/null
+++ b/node/network/availability-distribution/src/responder.rs
@@ -0,0 +1,65 @@
+// Copyright 2021 Parity Technologies (UK) Ltd.
+// This file is part of Polkadot.
+
+// Polkadot is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Polkadot is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Responder answers requests for availability chunks.
+
+use futures::channel::oneshot;
+
+use polkadot_node_network_protocol::request_response::{request::IncomingRequest, v1};
+use polkadot_primitives::v1::{CandidateHash, ErasureChunk, ValidatorIndex};
+use polkadot_subsystem::{
+	messages::{AllMessages, AvailabilityStoreMessage},
+	SubsystemContext,
+};
+
+use crate::error::{Error, Result};
+use crate::LOG_TARGET;
+
+/// Answer an incoming chunk request by querying the av store.
+pub async fn answer_request<Context>(
+	ctx: &mut Context,
+	req: IncomingRequest<v1::AvailabilityFetchingRequest>,
+) -> Result<()>
+where
+	Context: SubsystemContext,
+{
+	let chunk = query_chunk(ctx, req.payload.candidate_hash, req.payload.index).await?;
+
+	let response = match chunk {
+		None => v1::AvailabilityFetchingResponse::NoSuchChunk,
+		Some(chunk) => v1::AvailabilityFetchingResponse::Chunk(chunk.into()),
+	};
+
+	req.send_response(response).map_err(|_| Error::SendResponse)
+}
+
+#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))]
+async fn query_chunk<Context>(
+	ctx: &mut Context,
+	candidate_hash: CandidateHash,
+	validator_index: ValidatorIndex,
+) -> Result<Option<ErasureChunk>>
+where
+	Context: SubsystemContext,
+{
+	let (tx, rx) = oneshot::channel();
+	ctx.send_message(AllMessages::AvailabilityStore(
+		AvailabilityStoreMessage::QueryChunk(candidate_hash, validator_index, tx),
+	))
+	.await;
+
+	rx.await.map_err(|e| Error::QueryChunkResponseChannel(e))
+}
diff --git a/node/network/protocol/src/request_response/v1.rs b/node/network/protocol/src/request_response/v1.rs
index 24e6363b963d..06e4ea522086 100644
--- a/node/network/protocol/src/request_response/v1.rs
+++ b/node/network/protocol/src/request_response/v1.rs
@@ -38,6 +38,9 @@ pub enum AvailabilityFetchingResponse {
 	/// The requested chunk data.
 	#[codec(index = 0)]
 	Chunk(ChunkResponse),
+	/// Node was not in possession of the requested chunk.
+	#[codec(index = 1)]
+	NoSuchChunk,
 }
 
 /// Skimmed down variant of `ErasureChunk`.

From 3fed607083ddd4cfea1ef43f2e8a58d2662f8752 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Fri, 19 Feb 2021 08:31:32 +0100
Subject: [PATCH 25/60] Cleanup + Fixes.

---
 node/network/availability-distribution/src/session_cache.rs | 2 +-
 primitives/src/v0.rs                                        | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs
index 111459a542e0..2b40c3db7b2d 100644
--- a/node/network/availability-distribution/src/session_cache.rs
+++ b/node/network/availability-distribution/src/session_cache.rs
@@ -90,7 +90,7 @@ pub struct BadValidators {
 }
 
 impl SessionCache {
-	pub(crate) fn new(keystore: SyncCryptoStorePtr) -> Self {
+	pub fn new(keystore: SyncCryptoStorePtr) -> Self {
 		SessionCache {
 			// 5 relatively conservative, 1 to 2 should suffice:
 			session_index_cache: LruCache::new(5),
diff --git a/primitives/src/v0.rs b/primitives/src/v0.rs
index ecb9f9cf3e53..c56c02c46641 100644
--- a/primitives/src/v0.rs
+++ b/primitives/src/v0.rs
@@ -114,9 +114,12 @@ impl MallocSizeOf for ValidatorId {
 }
 
 /// Index of the validator is used as a lightweight replacement of the `ValidatorId` when appropriate.
+#[cfg(not(feature = "std"))]
 #[derive(Eq, Ord, PartialEq, PartialOrd, Hash, Copy, Clone, Encode, Decode)]
+pub struct ValidatorIndex(pub u32);
+
 #[cfg(feature = "std")]
-#[derive(Debug, MallocSizeOf)]
+#[derive(Eq, Ord, PartialEq, PartialOrd, Hash, Copy, Clone, Encode, Decode, Debug, MallocSizeOf)]
 pub struct ValidatorIndex(pub u32);
 
 // We should really get https://github.com/paritytech/polkadot/issues/2403 going ..

From 39d6bc2b50563a638d23f7b762d5bad601cb7536 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Fri, 19 Feb 2021 08:53:41 +0100
Subject: [PATCH 26/60] More fixes.

---
 node/core/av-store/src/lib.rs                       |  2 +-
 node/core/backing/src/lib.rs                        |  4 ++--
 node/core/provisioner/src/lib.rs                    |  2 +-
 node/network/availability-recovery/src/lib.rs       |  2 +-
 node/network/bitfield-distribution/src/lib.rs       |  6 +++---
 node/network/collator-protocol/src/collator_side.rs |  4 ++--
 node/network/pov-distribution/src/lib.rs            |  2 +-
 node/network/statement-distribution/src/lib.rs      |  2 +-
 primitives/src/v0.rs                                |  2 +-
 runtime/parachains/src/inclusion.rs                 | 10 +++++-----
 runtime/parachains/src/reward_points.rs             |  2 +-
 runtime/parachains/src/scheduler.rs                 |  2 +-
 12 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/node/core/av-store/src/lib.rs b/node/core/av-store/src/lib.rs
index 0b4806b3157b..66846f7ecc88 100644
--- a/node/core/av-store/src/lib.rs
+++ b/node/core/av-store/src/lib.rs
@@ -968,7 +968,7 @@ fn process_message(
 		AvailabilityStoreMessage::QueryChunkAvailability(candidate, validator_index, tx) => {
 			let a = load_meta(&subsystem.db, &candidate)?
 				.map_or(false, |m|
-					*m.chunks_stored.get(validator_index as usize).as_deref().unwrap_or(&false)
+					*m.chunks_stored.get(validator_index.0 as usize).as_deref().unwrap_or(&false)
 				);
 			let _ = tx.send(a);
 		}
diff --git a/node/core/backing/src/lib.rs b/node/core/backing/src/lib.rs
index 5a8a78369585..495493132ce3 100644
--- a/node/core/backing/src/lib.rs
+++ b/node/core/backing/src/lib.rs
@@ -861,7 +861,7 @@ impl CandidateBackingJob {
 
 	#[tracing::instrument(level = "trace", skip(self), fields(subsystem = LOG_TARGET))]
 	fn check_statement_signature(&self, statement: &SignedFullStatement) -> Result<(), Error> {
-		let idx = statement.validator_index() as usize;
+		let idx = statement.validator_index().0 as usize;
 
 		if self.table_context.validators.len() > idx {
 			statement.check_signature(
@@ -902,7 +902,7 @@ impl CandidateBackingJob {
 	) -> Option<JaegerSpan> {
 		self.insert_or_get_unbacked_span(parent_span, hash).map(|span| {
 			let mut span = span.child("import-statement");
-			span.add_string_tag("validator-index", &format!("{}", validator));
+			span.add_string_tag("validator-index", &format!("{:?}", validator));
 			span
 		})
 	}
diff --git a/node/core/provisioner/src/lib.rs b/node/core/provisioner/src/lib.rs
index 8f9421b25f9d..df24f9ee5caf 100644
--- a/node/core/provisioner/src/lib.rs
+++ b/node/core/provisioner/src/lib.rs
@@ -507,7 +507,7 @@ fn bitfields_indicate_availability(
 	let availability_len = availability.len();
 
 	for bitfield in bitfields {
-		let validator_idx = bitfield.validator_index() as usize;
+		let validator_idx = bitfield.validator_index().0 as usize;
 		match availability.get_mut(validator_idx) {
 			None => {
 				// in principle, this function might return a `Result<bool, Error>` so that we can more clearly express this error condition
diff --git a/node/network/availability-recovery/src/lib.rs b/node/network/availability-recovery/src/lib.rs
index 6b28c7295f0d..e000b03c620c 100644
--- a/node/network/availability-recovery/src/lib.rs
+++ b/node/network/availability-recovery/src/lib.rs
@@ -165,7 +165,7 @@ impl Interaction {
 				let (tx, rx) = oneshot::channel();
 
 				self.to_state.send(FromInteraction::MakeRequest(
-					self.validator_authority_keys[validator_index as usize].clone(),
+					self.validator_authority_keys[validator_index.0 as usize].clone(),
 					self.candidate_hash.clone(),
 					validator_index,
 					tx,
diff --git a/node/network/bitfield-distribution/src/lib.rs b/node/network/bitfield-distribution/src/lib.rs
index 1029acd6bb24..a3af301d2ffc 100644
--- a/node/network/bitfield-distribution/src/lib.rs
+++ b/node/network/bitfield-distribution/src/lib.rs
@@ -284,7 +284,7 @@ where
 		return;
 	}
 
-	let validator_index = signed_availability.validator_index() as usize;
+	let validator_index = signed_availability.validator_index().0 as usize;
 	let validator = if let Some(validator) = validator_set.get(validator_index) {
 		validator.clone()
 	} else {
@@ -410,7 +410,7 @@ where
 		span.add_string_tag("peer-id", &origin.to_base58());
 		span.add_string_tag(
 			"claimed-validator",
-			&message.signed_availability.validator_index().to_string(),
+			&message.signed_availability.validator_index().0.to_string(),
 		);
 		span
 	};
@@ -429,7 +429,7 @@ where
 	// Use the (untrusted) validator index provided by the signed payload
 	// and see if that one actually signed the availability bitset.
 	let signing_context = job_data.signing_context.clone();
-	let validator_index = message.signed_availability.validator_index() as usize;
+	let validator_index = message.signed_availability.validator_index().0 as usize;
 	let validator = if let Some(validator) = validator_set.get(validator_index) {
 		validator.clone()
 	} else {
diff --git a/node/network/collator-protocol/src/collator_side.rs b/node/network/collator-protocol/src/collator_side.rs
index afd3bc1a4953..1689ed12c719 100644
--- a/node/network/collator-protocol/src/collator_side.rs
+++ b/node/network/collator-protocol/src/collator_side.rs
@@ -329,8 +329,8 @@ async fn determine_our_validators(
 
 	let validators = request_validators_ctx(relay_parent, ctx).await?.await??;
 
-	let current_validators = current_validators.iter().map(|i| validators[*i as usize].clone()).collect();
-	let next_validators = next_validators.iter().map(|i| validators[*i as usize].clone()).collect();
+	let current_validators = current_validators.iter().map(|i| validators[i.0 as usize].clone()).collect();
+	let next_validators = next_validators.iter().map(|i| validators[i.0 as usize].clone()).collect();
 
 	Ok((current_validators, next_validators))
 }
diff --git a/node/network/pov-distribution/src/lib.rs b/node/network/pov-distribution/src/lib.rs
index 6527be99c3d5..d5043cb5b36a 100644
--- a/node/network/pov-distribution/src/lib.rs
+++ b/node/network/pov-distribution/src/lib.rs
@@ -336,7 +336,7 @@ async fn determine_validators_for_core(
 
 	let validators = connect_to_validators
 		.into_iter()
-		.map(|idx| validators[idx as usize].clone())
+		.map(|idx| validators[idx.0 as usize].clone())
 		.collect();
 
 	Ok(Some(validators))
diff --git a/node/network/statement-distribution/src/lib.rs b/node/network/statement-distribution/src/lib.rs
index 960ff129bacf..1eee14892ab7 100644
--- a/node/network/statement-distribution/src/lib.rs
+++ b/node/network/statement-distribution/src/lib.rs
@@ -494,7 +494,7 @@ fn check_statement_signature(
 		parent_hash: relay_parent,
 	};
 
-	head.validators.get(statement.validator_index() as usize)
+	head.validators.get(statement.validator_index().0 as usize)
 		.ok_or(())
 		.and_then(|v| statement.check_signature(&signing_context, v))
 }
diff --git a/primitives/src/v0.rs b/primitives/src/v0.rs
index c56c02c46641..16e7bf88578d 100644
--- a/primitives/src/v0.rs
+++ b/primitives/src/v0.rs
@@ -670,7 +670,7 @@ pub struct AvailableData {
 }
 
 /// A chunk of erasure-encoded block data.
-#[derive(PartialEq, Eq, Clone, Encode, Decode, Default)]
+#[derive(PartialEq, Eq, Clone, Encode, Decode)]
 #[cfg_attr(feature = "std", derive(Serialize, Deserialize, Debug, Hash))]
 pub struct ErasureChunk {
 	/// The erasure-encoded chunk of data belonging to the candidate block.
diff --git a/runtime/parachains/src/inclusion.rs b/runtime/parachains/src/inclusion.rs
index 7329ff2e65be..02d29aa2e1b3 100644
--- a/runtime/parachains/src/inclusion.rs
+++ b/runtime/parachains/src/inclusion.rs
@@ -288,7 +288,7 @@ impl<T: Config> Module<T> {
 				);
 
 				ensure!(
-					signed_bitfield.validator_index() < validators.len() as ValidatorIndex,
+					(signed_bitfield.validator_index().0 as usize) < validators.len(),
 					Error::<T>::ValidatorIndexOutOfBounds,
 				);
 
@@ -532,7 +532,7 @@ impl<T: Config> Module<T> {
 									&signing_context,
 									group_vals.len(),
 									|idx| group_vals.get(idx)
-										.and_then(|i| validators.get(*i.0 as usize))
+										.and_then(|i| validators.get(i.0 as usize))
 										.map(|v| v.clone()),
 								);
 
@@ -551,7 +551,7 @@ impl<T: Config> Module<T> {
 								let val_idx = group_vals.get(bit_idx)
 									.expect("this query done above; qed");
 
-								backers.set(*val_idx as _, true);
+								backers.set(val_idx.0 as _, true);
 							}
 						}
 
@@ -658,12 +658,12 @@ impl<T: Config> Module<T> {
 
 		T::RewardValidators::reward_backing(backers.iter().enumerate()
 			.filter(|(_, backed)| **backed)
-			.map(|(i, _)| i as _)
+			.map(|(i, _)| ValidatorIndex(i as _))
 		);
 
 		T::RewardValidators::reward_bitfields(availability_votes.iter().enumerate()
 			.filter(|(_, voted)| **voted)
-			.map(|(i, _)| i as _)
+			.map(|(i, _)| ValidatorIndex(i as _))
 		);
 
 		// initial weight is config read.
diff --git a/runtime/parachains/src/reward_points.rs b/runtime/parachains/src/reward_points.rs
index 7ff208d6d132..3fb8435e0916 100644
--- a/runtime/parachains/src/reward_points.rs
+++ b/runtime/parachains/src/reward_points.rs
@@ -38,7 +38,7 @@ fn reward_by_indices<C, I>(points: u32, indices: I) where
 	// and we are rewarding for behavior in current session.
 	let validators = C::SessionInterface::validators();
 	let rewards = indices.into_iter()
-		.filter_map(|i| validators.get(i as usize).map(|v| v.clone()))
+		.filter_map(|i| validators.get(i.0 as usize).map(|v| v.clone()))
 		.map(|v| (v, points));
 
 	<pallet_staking::Module<C>>::reward_by_ids(rewards);
diff --git a/runtime/parachains/src/scheduler.rs b/runtime/parachains/src/scheduler.rs
index f21f6646d4cf..1436fca0dea3 100644
--- a/runtime/parachains/src/scheduler.rs
+++ b/runtime/parachains/src/scheduler.rs
@@ -263,7 +263,7 @@ impl<T: Config> Module<T> {
 
 			let mut shuffled_indices: Vec<_> = (0..validators.len())
 				.enumerate()
-				.map(|(i, _)| i as ValidatorIndex)
+				.map(|(i, _)| ValidatorIndex(i as _))
 				.collect();
 
 			shuffled_indices.shuffle(&mut rng);

From 49b176469646e683f04a76f6b625fadab3877c92 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Fri, 19 Feb 2021 09:21:29 +0100
Subject: [PATCH 27/60] More fixes.

adder-collator is running!
---
 node/core/av-store/src/lib.rs                          |  8 ++++----
 .../src/requester/fetch_task.rs                        |  2 +-
 node/network/availability-recovery/src/lib.rs          |  6 +++---
 node/network/protocol/src/request_response/v1.rs       |  2 +-
 primitives/src/v0.rs                                   | 10 +++-------
 5 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/node/core/av-store/src/lib.rs b/node/core/av-store/src/lib.rs
index 66846f7ecc88..2b29f9426689 100644
--- a/node/core/av-store/src/lib.rs
+++ b/node/core/av-store/src/lib.rs
@@ -1034,10 +1034,10 @@ fn store_chunk(
 		None => return Ok(false), // we weren't informed of this candidate by import events.
 	};
 
-	match meta.chunks_stored.get(chunk.index as usize).map(|b| *b) {
+	match meta.chunks_stored.get(chunk.index.0 as usize).map(|b| *b) {
 		Some(true) => return Ok(true), // already stored.
 		Some(false) => {
-			meta.chunks_stored.set(chunk.index as usize, true);
+			meta.chunks_stored.set(chunk.index.0 as usize, true);
 
 			write_chunk(&mut tx, &candidate_hash, chunk.index, &chunk);
 			write_meta(&mut tx, &candidate_hash, &meta);
@@ -1090,7 +1090,7 @@ fn store_available_data(
 		.map(|(index, (chunk, proof))| ErasureChunk {
 			chunk: chunk.clone(),
 			proof,
-			index: index as u32,
+			index: ValidatorIndex(index as u32),
 		});
 
 	for chunk in erasure_chunks {
@@ -1135,7 +1135,7 @@ fn prune_all(db: &Arc<dyn KeyValueDB>, clock: &dyn Clock) -> Result<(), Error> {
 			// delete chunks.
 			for (i, b) in meta.chunks_stored.iter().enumerate() {
 				if *b {
-					delete_chunk(&mut tx, &candidate_hash, i as _);
+					delete_chunk(&mut tx, &candidate_hash, ValidatorIndex(i as _));
 				}
 			}
 
diff --git a/node/network/availability-distribution/src/requester/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task.rs
index 501a30abce70..aee546da9b9b 100644
--- a/node/network/availability-distribution/src/requester/fetch_task.rs
+++ b/node/network/availability-distribution/src/requester/fetch_task.rs
@@ -312,7 +312,7 @@ impl RunningTask {
 
 	fn validate_chunk(&self, validator: &AuthorityDiscoveryId, chunk: &ErasureChunk) -> bool {
 		let anticipated_hash =
-			match branch_hash(&self.erasure_root, &chunk.proof, chunk.index as usize) {
+			match branch_hash(&self.erasure_root, &chunk.proof, chunk.index.0 as usize) {
 				Ok(hash) => hash,
 				Err(e) => {
 					tracing::trace!(
diff --git a/node/network/availability-recovery/src/lib.rs b/node/network/availability-recovery/src/lib.rs
index e000b03c620c..5790f00b565c 100644
--- a/node/network/availability-recovery/src/lib.rs
+++ b/node/network/availability-recovery/src/lib.rs
@@ -195,7 +195,7 @@ impl Interaction {
 					if let Ok(anticipated_hash) = branch_hash(
 						&self.erasure_root,
 						&chunk.proof,
-						chunk.index as usize,
+						chunk.index.0 as usize,
 					) {
 						let erasure_chunk_hash = BlakeTwo256::hash(&chunk.chunk);
 
@@ -269,7 +269,7 @@ impl Interaction {
 			if self.received_chunks.len() >= self.threshold {
 				let concluded = match polkadot_erasure_coding::reconstruct_v1(
 					self.validators.len(),
-					self.received_chunks.values().map(|c| (&c.chunk[..], c.index as usize)),
+					self.received_chunks.values().map(|c| (&c.chunk[..], c.index.0 as usize)),
 				) {
 					Ok(data) => {
 						if reconstructed_data_matches_root(self.validators.len(), &self.erasure_root, &data) {
@@ -423,7 +423,7 @@ async fn launch_interaction(
 	let erasure_root = receipt.descriptor.erasure_root;
 	let validators = session_info.validators.clone();
 	let validator_authority_keys = session_info.discovery_keys.clone();
-	let mut shuffling: Vec<_> = (0..validators.len() as ValidatorIndex).collect();
+	let mut shuffling: Vec<_> = (0..validators.len() as u32).map(ValidatorIndex).collect();
 
 	state.interactions.insert(
 		candidate_hash.clone(),
diff --git a/node/network/protocol/src/request_response/v1.rs b/node/network/protocol/src/request_response/v1.rs
index 06e4ea522086..22724c1f44b7 100644
--- a/node/network/protocol/src/request_response/v1.rs
+++ b/node/network/protocol/src/request_response/v1.rs
@@ -69,7 +69,7 @@ impl ChunkResponse {
 		ErasureChunk {
 			chunk: self.chunk,
 			proof: self.proof,
-			index: req.index.0,
+			index: req.index,
 		}
 	}
 }
diff --git a/primitives/src/v0.rs b/primitives/src/v0.rs
index 16e7bf88578d..4259b1269c15 100644
--- a/primitives/src/v0.rs
+++ b/primitives/src/v0.rs
@@ -114,12 +114,8 @@ impl MallocSizeOf for ValidatorId {
 }
 
 /// Index of the validator is used as a lightweight replacement of the `ValidatorId` when appropriate.
-#[cfg(not(feature = "std"))]
-#[derive(Eq, Ord, PartialEq, PartialOrd, Hash, Copy, Clone, Encode, Decode)]
-pub struct ValidatorIndex(pub u32);
-
-#[cfg(feature = "std")]
-#[derive(Eq, Ord, PartialEq, PartialOrd, Hash, Copy, Clone, Encode, Decode, Debug, MallocSizeOf)]
+#[derive(Eq, Ord, PartialEq, PartialOrd, Copy, Clone, Encode, Decode)]
+#[cfg_attr(feature = "std", derive(Serialize, Deserialize, Debug, Hash, MallocSizeOf))]
 pub struct ValidatorIndex(pub u32);
 
 // We should really get https://github.com/paritytech/polkadot/issues/2403 going ..
@@ -676,7 +672,7 @@ pub struct ErasureChunk {
 	/// The erasure-encoded chunk of data belonging to the candidate block.
 	pub chunk: Vec<u8>,
 	/// The index of this erasure-encoded chunk of data.
-	pub index: u32,
+	pub index: ValidatorIndex,
 	/// Proof for this chunk's branch in the Merkle tree.
 	pub proof: Vec<Vec<u8>>,
 }

From a1413301d5c2113f2f82697cd3aaedafdfe4ba9e Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Fri, 19 Feb 2021 09:42:18 +0100
Subject: [PATCH 28/60] Some docs.

---
 node/network/availability-distribution/src/error.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs
index 658bad97f3ce..f66d418d8e35 100644
--- a/node/network/availability-distribution/src/error.rs
+++ b/node/network/availability-distribution/src/error.rs
@@ -15,6 +15,8 @@
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
 //
 
+//! Error handling related code and Error/Result definitions.
+
 use thiserror::Error;
 
 use futures::channel::oneshot;
@@ -23,6 +25,7 @@ use polkadot_node_subsystem_util::Error as UtilError;
 use polkadot_primitives::v1::SessionIndex;
 use polkadot_subsystem::{errors::RuntimeApiError, SubsystemError};
 
+/// Errors of this subsystem.
 #[derive(Debug, Error)]
 pub enum Error {
 	#[error("Response channel to obtain StoreChunk failed")]

From fad4586deaa827990abc077d46fd0333481c4bda Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Fri, 19 Feb 2021 10:40:06 +0100
Subject: [PATCH 29/60] Docs.

---
 .../network/availability-distribution/src/lib.rs |  4 +++-
 .../src/requester/fetch_task.rs                  | 16 +++++++++++++++-
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs
index 9ee9661affe0..ab68a899582b 100644
--- a/node/network/availability-distribution/src/lib.rs
+++ b/node/network/availability-distribution/src/lib.rs
@@ -45,7 +45,7 @@ const LOG_TARGET: &'static str = "availability_distribution";
 /// TODO: Dummy for now.
 type Metrics = ();
 
-/// The bitfield distribution subsystem.
+/// The availability distribution subsystem.
 pub struct AvailabilityDistributionSubsystem {
 	/// Pointer to a keystore, which is required for determining this nodes validator index.
 	keystore: SyncCryptoStorePtr,
@@ -90,6 +90,8 @@ impl AvailabilityDistributionSubsystem {
 					from_task = state.next() => Either::Right(from_task),
 				}
 			};
+
+			// Handle task messages sending:
 			let message = match action {
 				Either::Left(subsystem_msg) => {
 					subsystem_msg.map_err(|e| Error::IncomingMessageChannel(e))?
diff --git a/node/network/availability-distribution/src/requester/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task.rs
index aee546da9b9b..7c21bfaf3a34 100644
--- a/node/network/availability-distribution/src/requester/fetch_task.rs
+++ b/node/network/availability-distribution/src/requester/fetch_task.rs
@@ -152,6 +152,8 @@ impl FetchTaskConfig {
 
 impl FetchTask {
 	/// Start fetching a chunk.
+	///
+	/// A task handling the fetching of the configured chunk will be spawned.
 	pub async fn start<Context>(config: FetchTaskConfig, ctx: &mut Context) -> Result<Self>
 	where
 		Context: SubsystemContext,
@@ -181,6 +183,8 @@ impl FetchTask {
 	}
 
 	/// Add the given leaf to the relay parents which are making this task relevant.
+	///
+	/// This is for book keeping, so we know we are already fetching a chunk.
 	pub fn add_leaf(&mut self, leaf: Hash) {
 		self.live_in.insert(leaf);
 	}
@@ -189,7 +193,7 @@ impl FetchTask {
 	/// fetching.
 	pub fn remove_leaves(&mut self, leaves: &HashSet<Hash>) {
 		self.live_in.difference(leaves);
-		if self.live_in.is_empty() {
+		if self.live_in.is_empty() && !self.is_finished() {
 			self.state = FetchedState::Canceled
 		}
 	}
@@ -199,6 +203,16 @@ impl FetchTask {
 	pub fn is_live(&self) -> bool {
 		!self.live_in.is_empty()
 	}
+
+	/// Whether or not this task can be considered finished.
+	///
+	/// That is, it is either canceled, succeeded or failed.
+	pub fn is_finished(&self) -> bool {
+		match &self.state {
+			FetchedState::Canceled => true,
+			FetchedState::Started(sender) => sender.is_canceled(),
+		}
+	}
 }
 
 /// Things that can go wrong in task execution.

From e617e91bd6d9660c13342b3316e1539ce86912df Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Fri, 19 Feb 2021 13:00:06 +0100
Subject: [PATCH 30/60] Fix reporting of bad guys.

---
 .../src/requester/fetch_task.rs                      |  2 +-
 .../availability-distribution/src/session_cache.rs   | 12 ++++++++++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/node/network/availability-distribution/src/requester/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task.rs
index 7c21bfaf3a34..f19dc98ba743 100644
--- a/node/network/availability-distribution/src/requester/fetch_task.rs
+++ b/node/network/availability-distribution/src/requester/fetch_task.rs
@@ -238,7 +238,7 @@ impl RunningTask {
 	/// Try validators in backing group in order.
 	async fn run_inner(mut self) {
 		let mut bad_validators = Vec::new();
-		// Try validators in order:
+		// Try validators in reverse order:
 		while let Some(validator) = self.group.pop() {
 			// Send request:
 			let resp = match self.do_request(&validator).await {
diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs
index 2b40c3db7b2d..1a5c2e2eee02 100644
--- a/node/network/availability-distribution/src/session_cache.rs
+++ b/node/network/availability-distribution/src/session_cache.rs
@@ -143,6 +143,9 @@ impl SessionCache {
 	}
 
 	/// Make sure we try unresponsive or misbehaving validators last.
+	///
+	/// We assume validators in a group are tried in reverse order, so the reported bad validators
+	/// will be put at the beginning of the group.
 	pub fn report_bad(&mut self, mut report: BadValidators) -> Result<()> {
 		let session = self
 			.session_info_cache
@@ -153,9 +156,14 @@ impl SessionCache {
 			.get_mut(report.group_index.0 as usize)
 			.ok_or(Error::ReportBadValidators("Validator group not found"))?;
 		let bad_set = report.bad_validators.iter().collect::<HashSet<_>>();
-		// Put the bad boys last:
+
+		// Get rid of bad boys:
 		group.retain(|v| !bad_set.contains(v));
-		group.append(&mut report.bad_validators);
+
+		// We are trying validators in reverse order, so bad ones should be first:
+		let mut new_group = report.bad_validators;
+		new_group.append(group);
+		*group = new_group;
 		Ok(())
 	}
 

From a4eef9b249d6a25dca52d41232ba5a4936038a5f Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Fri, 19 Feb 2021 19:20:07 +0100
Subject: [PATCH 31/60] Fix tests

---
 node/core/bitfield-signing/src/lib.rs         |  2 +-
 node/core/provisioner/src/tests.rs            | 26 ++++----
 .../src/session_cache.rs                      |  2 +-
 node/network/pov-distribution/src/tests.rs    |  8 +--
 .../network/statement-distribution/src/lib.rs | 60 +++++++++----------
 runtime/parachains/src/inclusion.rs           | 36 +++++------
 6 files changed, 67 insertions(+), 67 deletions(-)

diff --git a/node/core/bitfield-signing/src/lib.rs b/node/core/bitfield-signing/src/lib.rs
index f337db40914f..bd194742acbc 100644
--- a/node/core/bitfield-signing/src/lib.rs
+++ b/node/core/bitfield-signing/src/lib.rs
@@ -317,7 +317,7 @@ mod tests {
 		block_on(async move {
 			let (mut sender, mut receiver) = mpsc::channel(10);
 			let relay_parent = Hash::default();
-			let validator_index = 1u32;
+			let validator_index = ValidatorIndex(1u32);
 
 			let future = construct_availability_bitfield(
 				relay_parent,
diff --git a/node/core/provisioner/src/tests.rs b/node/core/provisioner/src/tests.rs
index 40a1c51e1ad0..8f26e6ee2f09 100644
--- a/node/core/provisioner/src/tests.rs
+++ b/node/core/provisioner/src/tests.rs
@@ -78,9 +78,9 @@ mod select_availability_bitfields {
 		// we pass in three bitfields with two validators
 		// this helps us check the postcondition that we get two bitfields back, for which the validators differ
 		let bitfields = vec![
-			block_on(signed_bitfield(&keystore, bitvec.clone(), 0)),
-			block_on(signed_bitfield(&keystore, bitvec.clone(), 1)),
-			block_on(signed_bitfield(&keystore, bitvec, 1)),
+			block_on(signed_bitfield(&keystore, bitvec.clone(), ValidatorIndex(0))),
+			block_on(signed_bitfield(&keystore, bitvec.clone(), ValidatorIndex(1))),
+			block_on(signed_bitfield(&keystore, bitvec, ValidatorIndex(1))),
 		];
 
 		let mut selected_bitfields = select_availability_bitfields(&cores, &bitfields);
@@ -116,9 +116,9 @@ mod select_availability_bitfields {
 		];
 
 		let bitfields = vec![
-			block_on(signed_bitfield(&keystore, bitvec0, 0)),
-			block_on(signed_bitfield(&keystore, bitvec1, 1)),
-			block_on(signed_bitfield(&keystore, bitvec2.clone(), 2)),
+			block_on(signed_bitfield(&keystore, bitvec0, ValidatorIndex(0))),
+			block_on(signed_bitfield(&keystore, bitvec1, ValidatorIndex(1))),
+			block_on(signed_bitfield(&keystore, bitvec2.clone(), ValidatorIndex(2))),
 		];
 
 		let selected_bitfields = select_availability_bitfields(&cores, &bitfields);
@@ -140,8 +140,8 @@ mod select_availability_bitfields {
 		let cores = vec![occupied_core(0), occupied_core(1)];
 
 		let bitfields = vec![
-			block_on(signed_bitfield(&keystore, bitvec, 1)),
-			block_on(signed_bitfield(&keystore, bitvec1.clone(), 1)),
+			block_on(signed_bitfield(&keystore, bitvec, ValidatorIndex(1))),
+			block_on(signed_bitfield(&keystore, bitvec1.clone(), ValidatorIndex(1))),
 		];
 
 		let selected_bitfields = select_availability_bitfields(&cores, &bitfields);
@@ -174,11 +174,11 @@ mod select_availability_bitfields {
 		// these are out of order but will be selected in order. The better
 		// bitfield for 3 will be selected.
 		let bitfields = vec![
-			block_on(signed_bitfield(&keystore, bitvec2.clone(), 3)),
-			block_on(signed_bitfield(&keystore, bitvec3.clone(), 3)),
-			block_on(signed_bitfield(&keystore, bitvec0.clone(), 0)),
-			block_on(signed_bitfield(&keystore, bitvec2.clone(), 2)),
-			block_on(signed_bitfield(&keystore, bitvec1.clone(), 1)),
+			block_on(signed_bitfield(&keystore, bitvec2.clone(), ValidatorIndex(3))),
+			block_on(signed_bitfield(&keystore, bitvec3.clone(), ValidatorIndex(3))),
+			block_on(signed_bitfield(&keystore, bitvec0.clone(), ValidatorIndex(0))),
+			block_on(signed_bitfield(&keystore, bitvec2.clone(), ValidatorIndex(2))),
+			block_on(signed_bitfield(&keystore, bitvec1.clone(), ValidatorIndex(1))),
 		];
 
 		let selected_bitfields = select_availability_bitfields(&cores, &bitfields);
diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs
index 1a5c2e2eee02..874671fbefcb 100644
--- a/node/network/availability-distribution/src/session_cache.rs
+++ b/node/network/availability-distribution/src/session_cache.rs
@@ -146,7 +146,7 @@ impl SessionCache {
 	///
 	/// We assume validators in a group are tried in reverse order, so the reported bad validators
 	/// will be put at the beginning of the group.
-	pub fn report_bad(&mut self, mut report: BadValidators) -> Result<()> {
+	pub fn report_bad(&mut self, report: BadValidators) -> Result<()> {
 		let session = self
 			.session_info_cache
 			.get_mut(&report.session_index)
diff --git a/node/network/pov-distribution/src/tests.rs b/node/network/pov-distribution/src/tests.rs
index 8cf37dfad878..2dfc0ce11f96 100644
--- a/node/network/pov-distribution/src/tests.rs
+++ b/node/network/pov-distribution/src/tests.rs
@@ -174,7 +174,7 @@ impl Default for TestState {
 			.take(validator_public.len())
 			.collect();
 
-		let validator_groups = vec![vec![2, 0, 4], vec![1], vec![3]];
+		let validator_groups = vec![vec![2, 0, 4], vec![1], vec![3]].into_iter().map(|g| g.map(ValidatorIndex)).collect();
 		let group_rotation_info = GroupRotationInfo {
 			session_start_block: 0,
 			group_rotation_frequency: 100,
@@ -238,11 +238,11 @@ async fn test_validator_discovery(
 			assert_eq!(index, session_index);
 
 			let validators = validator_group.iter()
-				.map(|idx| validator_ids[*idx as usize].clone())
+				.map(|idx| validator_ids[idx.0 as usize].clone())
 				.collect();
 
 			let discovery_keys = validator_group.iter()
-				.map(|idx| discovery_ids[*idx as usize].clone())
+				.map(|idx| discovery_ids[idx.0 as usize].clone())
 				.collect();
 
 			tx.send(Ok(Some(SessionInfo {
@@ -737,7 +737,7 @@ fn we_inform_peers_with_same_view_we_are_awaiting() {
 		.take(validators.len())
 		.collect();
 
-	let validator_groups = vec![vec![2, 0, 4], vec![1], vec![3]];
+	let validator_groups = vec![vec![2, 0, 4], vec![1], vec![3]].map(|g| g.map(ValidatorIndex)).collect();
 	let group_rotation_info = GroupRotationInfo {
 		session_start_block: 0,
 		group_rotation_frequency: 100,
diff --git a/node/network/statement-distribution/src/lib.rs b/node/network/statement-distribution/src/lib.rs
index 7d79f54b5b89..fdeb7e0c00b2 100644
--- a/node/network/statement-distribution/src/lib.rs
+++ b/node/network/statement-distribution/src/lib.rs
@@ -1133,7 +1133,7 @@ mod tests {
 			&keystore,
 			Statement::Seconded(candidate_a.clone()),
 			&signing_context,
-			0,
+			ValidatorIndex(0),
 			&alice_public.into(),
 		)).expect("should be signed");
 		let noted = head_data.note_statement(a_seconded_val_0.clone());
@@ -1150,7 +1150,7 @@ mod tests {
 			&keystore,
 			Statement::Seconded(candidate_b.clone()),
 			&signing_context,
-			0,
+			ValidatorIndex(0),
 			&alice_public.into(),
 		)).expect("should be signed"));
 
@@ -1161,7 +1161,7 @@ mod tests {
 			&keystore,
 			Statement::Seconded(candidate_c.clone()),
 			&signing_context,
-			0,
+			ValidatorIndex(0),
 			&alice_public.into(),
 		)).expect("should be signed"));
 
@@ -1172,7 +1172,7 @@ mod tests {
 			&keystore,
 			Statement::Seconded(candidate_b.clone()),
 			&signing_context,
-			1,
+			ValidatorIndex(1),
 			&bob_public.into(),
 		)).expect("should be signed"));
 
@@ -1183,7 +1183,7 @@ mod tests {
 			&keystore,
 			Statement::Seconded(candidate_c.clone()),
 			&signing_context,
-			1,
+			ValidatorIndex(1),
 			&bob_public.into(),
 		)).expect("should be signed"));
 
@@ -1233,7 +1233,7 @@ mod tests {
 		let hash_a = CandidateHash([1; 32].into());
 
 		// Sending an un-pinned statement should not work and should have no effect.
-		assert!(knowledge.send(&(CompactStatement::Valid(hash_a), 0)).is_none());
+		assert!(knowledge.send(&(CompactStatement::Valid(hash_a), ValidatorIndex(0))).is_none());
 		assert!(!knowledge.known_candidates.contains(&hash_a));
 		assert!(knowledge.sent_statements.is_empty());
 		assert!(knowledge.received_statements.is_empty());
@@ -1241,8 +1241,8 @@ mod tests {
 		assert!(knowledge.received_message_count.is_empty());
 
 		// Make the peer aware of the candidate.
-		assert_eq!(knowledge.send(&(CompactStatement::Candidate(hash_a), 0)), Some(true));
-		assert_eq!(knowledge.send(&(CompactStatement::Candidate(hash_a), 1)), Some(false));
+		assert_eq!(knowledge.send(&(CompactStatement::Candidate(hash_a), ValidatorIndex(0))), Some(true));
+		assert_eq!(knowledge.send(&(CompactStatement::Candidate(hash_a), ValidatorIndex(1))), Some(false));
 		assert!(knowledge.known_candidates.contains(&hash_a));
 		assert_eq!(knowledge.sent_statements.len(), 2);
 		assert!(knowledge.received_statements.is_empty());
@@ -1250,7 +1250,7 @@ mod tests {
 		assert!(knowledge.received_message_count.get(&hash_a).is_none());
 
 		// And now it should accept the dependent message.
-		assert_eq!(knowledge.send(&(CompactStatement::Valid(hash_a), 0)), Some(false));
+		assert_eq!(knowledge.send(&(CompactStatement::Valid(hash_a), ValidatorIndex(0))), Some(false));
 		assert!(knowledge.known_candidates.contains(&hash_a));
 		assert_eq!(knowledge.sent_statements.len(), 3);
 		assert!(knowledge.received_statements.is_empty());
@@ -1263,8 +1263,8 @@ mod tests {
 		let mut knowledge = PeerRelayParentKnowledge::default();
 
 		let hash_a = CandidateHash([1; 32].into());
-		assert!(knowledge.receive(&(CompactStatement::Candidate(hash_a), 0), 3).unwrap());
-		assert!(knowledge.send(&(CompactStatement::Candidate(hash_a), 0)).is_none());
+		assert!(knowledge.receive(&(CompactStatement::Candidate(hash_a), ValidatorIndex(0)), 3).unwrap());
+		assert!(knowledge.send(&(CompactStatement::Candidate(hash_a), ValidatorIndex(0))).is_none());
 	}
 
 	#[test]
@@ -1274,18 +1274,18 @@ mod tests {
 		let hash_a = CandidateHash([1; 32].into());
 
 		assert_eq!(
-			knowledge.receive(&(CompactStatement::Valid(hash_a), 0), 3),
+			knowledge.receive(&(CompactStatement::Valid(hash_a), ValidatorIndex(0)), 3),
 			Err(COST_UNEXPECTED_STATEMENT),
 		);
 
 		assert_eq!(
-			knowledge.receive(&(CompactStatement::Candidate(hash_a), 0), 3),
+			knowledge.receive(&(CompactStatement::Candidate(hash_a), ValidatorIndex(0)), 3),
 			Ok(true),
 		);
 
 		// Push statements up to the flood limit.
 		assert_eq!(
-			knowledge.receive(&(CompactStatement::Valid(hash_a), 1), 3),
+			knowledge.receive(&(CompactStatement::Valid(hash_a), ValidatorIndex(1)), 3),
 			Ok(false),
 		);
 
@@ -1293,14 +1293,14 @@ mod tests {
 		assert_eq!(*knowledge.received_message_count.get(&hash_a).unwrap(), 2);
 
 		assert_eq!(
-			knowledge.receive(&(CompactStatement::Valid(hash_a), 2), 3),
+			knowledge.receive(&(CompactStatement::Valid(hash_a), ValidatorIndex(2)), 3),
 			Ok(false),
 		);
 
 		assert_eq!(*knowledge.received_message_count.get(&hash_a).unwrap(), 3);
 
 		assert_eq!(
-			knowledge.receive(&(CompactStatement::Valid(hash_a), 7), 3),
+			knowledge.receive(&(CompactStatement::Valid(hash_a), ValidatorIndex(7)), 3),
 			Err(COST_APPARENT_FLOOD),
 		);
 
@@ -1312,23 +1312,23 @@ mod tests {
 		let hash_c = CandidateHash([3; 32].into());
 
 		assert_eq!(
-			knowledge.receive(&(CompactStatement::Candidate(hash_b), 0), 3),
+			knowledge.receive(&(CompactStatement::Candidate(hash_b), ValidatorIndex(0)), 3),
 			Ok(true),
 		);
 
 		assert_eq!(
-			knowledge.receive(&(CompactStatement::Candidate(hash_c), 0), 3),
+			knowledge.receive(&(CompactStatement::Candidate(hash_c), ValidatorIndex(0)), 3),
 			Err(COST_UNEXPECTED_STATEMENT),
 		);
 
 		// Last, make sure that already-known statements are disregarded.
 		assert_eq!(
-			knowledge.receive(&(CompactStatement::Valid(hash_a), 2), 3),
+			knowledge.receive(&(CompactStatement::Valid(hash_a), ValidatorIndex(2)), 3),
 			Err(COST_DUPLICATE_STATEMENT),
 		);
 
 		assert_eq!(
-			knowledge.receive(&(CompactStatement::Candidate(hash_b), 0), 3),
+			knowledge.receive(&(CompactStatement::Candidate(hash_b), ValidatorIndex(0)), 3),
 			Err(COST_DUPLICATE_STATEMENT),
 		);
 	}
@@ -1386,7 +1386,7 @@ mod tests {
 				&keystore,
 				Statement::Seconded(candidate.clone()),
 				&signing_context,
-				0,
+				ValidatorIndex(0),
 				&alice_public.into(),
 			)).expect("should be signed"));
 
@@ -1396,7 +1396,7 @@ mod tests {
 				&keystore,
 				Statement::Valid(candidate_hash),
 				&signing_context,
-				1,
+				ValidatorIndex(1),
 				&bob_public.into(),
 			)).expect("should be signed"));
 
@@ -1406,7 +1406,7 @@ mod tests {
 				&keystore,
 				Statement::Valid(candidate_hash),
 				&signing_context,
-				2,
+				ValidatorIndex(2),
 				&charlie_public.into(),
 			)).expect("should be signed"));
 
@@ -1451,13 +1451,13 @@ mod tests {
 
 			assert!(c_knowledge.known_candidates.contains(&candidate_hash));
 			assert!(c_knowledge.sent_statements.contains(
-				&(CompactStatement::Candidate(candidate_hash), 0)
+				&(CompactStatement::Candidate(candidate_hash), ValidatorIndex(0))
 			));
 			assert!(c_knowledge.sent_statements.contains(
-				&(CompactStatement::Valid(candidate_hash), 1)
+				&(CompactStatement::Valid(candidate_hash), ValidatorIndex(1))
 			));
 			assert!(c_knowledge.sent_statements.contains(
-				&(CompactStatement::Valid(candidate_hash), 2)
+				&(CompactStatement::Valid(candidate_hash), ValidatorIndex(2))
 			));
 
 			// now see if we got the 3 messages from the active head data.
@@ -1538,14 +1538,14 @@ mod tests {
 					&keystore,
 					Statement::Seconded(candidate),
 					&signing_context,
-					0,
+					ValidatorIndex(0),
 					&alice_public.into(),
 				).await.expect("should be signed");
 
 				StoredStatement {
 					comparator: StoredStatementComparator {
 						compact: statement.payload().to_compact(),
-						validator_index: 0,
+						validator_index: ValidatorIndex(0),
 						signature: statement.signature().clone()
 					},
 					statement,
@@ -1565,7 +1565,7 @@ mod tests {
 				assert!(needs_dependents.contains(&peer_c));
 			}
 
-			let fingerprint = (statement.compact().clone(), 0);
+			let fingerprint = (statement.compact().clone(), ValidatorIndex(0));
 
 			assert!(
 				peer_data.get(&peer_b).unwrap()
@@ -1706,7 +1706,7 @@ mod tests {
 					&keystore,
 					Statement::Seconded(candidate),
 					&signing_context,
-					0,
+					ValidatorIndex(0),
 					&alice_public.into(),
 				).await.expect("should be signed")
 			};
diff --git a/runtime/parachains/src/inclusion.rs b/runtime/parachains/src/inclusion.rs
index 7deaf32e73a7..1ed91b386817 100644
--- a/runtime/parachains/src/inclusion.rs
+++ b/runtime/parachains/src/inclusion.rs
@@ -991,7 +991,7 @@ mod tests {
 		let candidate_hash = candidate.hash();
 
 		for (idx_in_group, val_idx) in group.iter().enumerate().take(signing) {
-			let key: Sr25519Keyring = validators[*val_idx as usize];
+			let key: Sr25519Keyring = validators[val_idx.0 as usize];
 			*validator_indices.get_mut(idx_in_group).unwrap() = true;
 
 			let signature = SignedStatement::sign(
@@ -1020,7 +1020,7 @@ mod tests {
 			&backed,
 			signing_context,
 			group.len(),
-			|i| Some(validators[group[i] as usize].public().into()),
+			|i| Some(validators[group[i].0 as usize].public().into()),
 		).ok().unwrap_or(0) * 2 > group.len();
 
 		if should_pass {
@@ -1238,7 +1238,7 @@ mod tests {
 				let signed = block_on(sign_bitfield(
 					&keystore,
 					&validators[0],
-					0,
+					ValidatorIndex(0),
 					bare_bitfield,
 					&signing_context,
 				));
@@ -1255,7 +1255,7 @@ mod tests {
 				let signed = block_on(sign_bitfield(
 					&keystore,
 					&validators[0],
-					0,
+					ValidatorIndex(0),
 					bare_bitfield,
 					&signing_context,
 				));
@@ -1272,7 +1272,7 @@ mod tests {
 				let signed_0 = block_on(sign_bitfield(
 					&keystore,
 					&validators[0],
-					0,
+					ValidatorIndex(0),
 					bare_bitfield.clone(),
 					&signing_context,
 				));
@@ -1280,7 +1280,7 @@ mod tests {
 				let signed_1 = block_on(sign_bitfield(
 					&keystore,
 					&validators[1],
-					1,
+					ValidatorIndex(1),
 					bare_bitfield,
 					&signing_context,
 				));
@@ -1298,7 +1298,7 @@ mod tests {
 				let signed = block_on(sign_bitfield(
 					&keystore,
 					&validators[0],
-					0,
+					ValidatorIndex(0),
 					bare_bitfield,
 					&signing_context,
 				));
@@ -1315,7 +1315,7 @@ mod tests {
 				let signed = block_on(sign_bitfield(
 					&keystore,
 					&validators[0],
-					0,
+					ValidatorIndex(0),
 					bare_bitfield,
 					&signing_context,
 				));
@@ -1349,7 +1349,7 @@ mod tests {
 				let signed = block_on(sign_bitfield(
 					&keystore,
 					&validators[0],
-					0,
+					ValidatorIndex(0),
 					bare_bitfield,
 					&signing_context,
 				));
@@ -1385,7 +1385,7 @@ mod tests {
 				let signed = block_on(sign_bitfield(
 					&keystore,
 					&validators[0],
-					0,
+					ValidatorIndex(0),
 					bare_bitfield,
 					&signing_context,
 				));
@@ -1509,7 +1509,7 @@ mod tests {
 				Some(block_on(sign_bitfield(
 					&keystore,
 					key,
-					i as ValidatorIndex,
+					ValidatorIndex(i as _),
 					to_sign,
 					&signing_context,
 				)))
@@ -1547,18 +1547,18 @@ mod tests {
 				let rewards = crate::mock::availability_rewards();
 
 				assert_eq!(rewards.len(), 4);
-				assert_eq!(rewards.get(&0).unwrap(), &1);
-				assert_eq!(rewards.get(&1).unwrap(), &1);
-				assert_eq!(rewards.get(&2).unwrap(), &1);
-				assert_eq!(rewards.get(&3).unwrap(), &1);
+				assert_eq!(rewards.get(&ValidatorIndex(0)).unwrap(), &1);
+				assert_eq!(rewards.get(&ValidatorIndex(1)).unwrap(), &1);
+				assert_eq!(rewards.get(&ValidatorIndex(2)).unwrap(), &1);
+				assert_eq!(rewards.get(&ValidatorIndex(3)).unwrap(), &1);
 			}
 
 			{
 				let rewards = crate::mock::backing_rewards();
 
 				assert_eq!(rewards.len(), 2);
-				assert_eq!(rewards.get(&3).unwrap(), &1);
-				assert_eq!(rewards.get(&4).unwrap(), &1);
+				assert_eq!(rewards.get(&ValidatorIndex(3)).unwrap(), &1);
+				assert_eq!(rewards.get(&ValidatorIndex(4)).unwrap(), &1);
 			}
 		});
 	}
@@ -1602,7 +1602,7 @@ mod tests {
 				group_index if group_index == GroupIndex::from(1) => Some(vec![2, 3]),
 				group_index if group_index == GroupIndex::from(2) => Some(vec![4]),
 				_ => panic!("Group index out of bounds for 2 parachains and 1 parathread core"),
-			};
+			}.map(|m| m.map(ValidatorIndex));
 
 			let thread_collator: CollatorId = Sr25519Keyring::Two.public().into();
 

From ea5f6a4dec66735f4b7f2bc1fd35beb36e22c79d Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Fri, 19 Feb 2021 21:43:29 +0100
Subject: [PATCH 32/60] Make all tests compile.

---
 .../approval-voting/src/approval_checking.rs  |  68 ++++++------
 node/core/approval-voting/src/criteria.rs     |  15 ++-
 node/core/approval-voting/src/import.rs       |   3 +-
 node/core/approval-voting/src/tests.rs        | 104 +++++++++---------
 node/core/av-store/src/tests.rs               |  26 ++---
 node/core/backing/src/lib.rs                  |  41 +++----
 .../approval-distribution/src/tests.rs        |  16 +--
 .../availability-recovery/src/tests.rs        |  12 +-
 node/network/bitfield-distribution/src/lib.rs |  14 +--
 .../collator-protocol/src/collator_side.rs    |  11 +-
 node/network/pov-distribution/src/tests.rs    |   6 +-
 runtime/parachains/src/inclusion.rs           |  24 ++--
 12 files changed, 172 insertions(+), 168 deletions(-)

diff --git a/node/core/approval-voting/src/approval_checking.rs b/node/core/approval-voting/src/approval_checking.rs
index 90dbd6728cec..ec623a25bf99 100644
--- a/node/core/approval-voting/src/approval_checking.rs
+++ b/node/core/approval-voting/src/approval_checking.rs
@@ -348,7 +348,7 @@ pub fn tranches_to_approve(
 mod tests {
 	use super::*;
 
-	use polkadot_primitives::v1::GroupIndex;
+	use polkadot_primitives::v1::{GroupIndex, ValidatorIndex};
 	use bitvec::bitvec;
 	use bitvec::order::Lsb0 as BitOrderLsb0;
 
@@ -393,7 +393,7 @@ mod tests {
 		}.into();
 
 		for i in 0..6 {
-			candidate.mark_approval(i);
+			candidate.mark_approval(ValidatorIndex(i));
 		}
 
 		let approval_entry = approval_db::v1::ApprovalEntry {
@@ -406,7 +406,7 @@ mod tests {
 
 		assert!(!check_approval(&candidate, &approval_entry, RequiredTranches::All));
 
-		candidate.mark_approval(6);
+		candidate.mark_approval(ValidatorIndex(6));
 		assert!(check_approval(&candidate, &approval_entry, RequiredTranches::All));
 	}
 
@@ -420,22 +420,22 @@ mod tests {
 		}.into();
 
 		for i in 0..6 {
-			candidate.mark_approval(i);
+			candidate.mark_approval(ValidatorIndex(i));
 		}
 
 		let approval_entry = approval_db::v1::ApprovalEntry {
 			tranches: vec![
 				approval_db::v1::TrancheEntry {
 					tranche: 0,
-					assignments: (0..4).map(|i| (i, 0.into())).collect(),
+					assignments: (0..4).map(|i| (ValidatorIndex(i), 0.into())).collect(),
 				},
 				approval_db::v1::TrancheEntry {
 					tranche: 1,
-					assignments: (4..6).map(|i| (i, 1.into())).collect(),
+					assignments: (4..6).map(|i| (ValidatorIndex(i), 1.into())).collect(),
 				},
 				approval_db::v1::TrancheEntry {
 					tranche: 2,
-					assignments: (6..10).map(|i| (i, 0.into())).collect(),
+					assignments: (6..10).map(|i| (ValidatorIndex(i), 0.into())).collect(),
 				},
 			],
 			assignments: bitvec![BitOrderLsb0, u8; 1; 10],
@@ -487,13 +487,13 @@ mod tests {
 			approved: false,
 		}.into();
 
-		approval_entry.import_assignment(0, 0, block_tick);
-		approval_entry.import_assignment(0, 1, block_tick);
+		approval_entry.import_assignment(0,ValidatorIndex(0), block_tick);
+		approval_entry.import_assignment(0,ValidatorIndex(1), block_tick);
 
-		approval_entry.import_assignment(1, 2, block_tick + 1);
-		approval_entry.import_assignment(1, 3, block_tick + 1);
+		approval_entry.import_assignment(1,ValidatorIndex(2), block_tick + 1);
+		approval_entry.import_assignment(1,ValidatorIndex(3), block_tick + 1);
 
-		approval_entry.import_assignment(2, 4, block_tick + 2);
+		approval_entry.import_assignment(2,ValidatorIndex(4), block_tick + 2);
 
 		let approvals = bitvec![BitOrderLsb0, u8; 1; 5];
 
@@ -524,8 +524,8 @@ mod tests {
 			approved: false,
 		}.into();
 
-		approval_entry.import_assignment(0, 0, block_tick);
-		approval_entry.import_assignment(1, 2, block_tick);
+		approval_entry.import_assignment(0, ValidatorIndex(0), block_tick);
+		approval_entry.import_assignment(1, ValidatorIndex(2), block_tick);
 
 		let approvals = bitvec![BitOrderLsb0, u8; 0; 10];
 
@@ -562,10 +562,10 @@ mod tests {
 			approved: false,
 		}.into();
 
-		approval_entry.import_assignment(0, 0, block_tick);
-		approval_entry.import_assignment(0, 1, block_tick);
+		approval_entry.import_assignment(0, ValidatorIndex(0), block_tick);
+		approval_entry.import_assignment(0, ValidatorIndex(1), block_tick);
 
-		approval_entry.import_assignment(1, 2, block_tick);
+		approval_entry.import_assignment(1, ValidatorIndex(2), block_tick);
 
 		let mut approvals = bitvec![BitOrderLsb0, u8; 0; 10];
 		approvals.set(0, true);
@@ -605,11 +605,11 @@ mod tests {
 			approved: false,
 		}.into();
 
-		approval_entry.import_assignment(0, 0, block_tick);
-		approval_entry.import_assignment(0, 1, block_tick);
+		approval_entry.import_assignment(0, ValidatorIndex(0), block_tick);
+		approval_entry.import_assignment(0, ValidatorIndex(1), block_tick);
 
-		approval_entry.import_assignment(1, 2, block_tick);
-		approval_entry.import_assignment(1, 3, block_tick);
+		approval_entry.import_assignment(1, ValidatorIndex(2), block_tick);
+		approval_entry.import_assignment(1, ValidatorIndex(3), block_tick);
 
 		let mut approvals = bitvec![BitOrderLsb0, u8; 0; n_validators];
 		approvals.set(0, true);
@@ -670,14 +670,14 @@ mod tests {
 			approved: false,
 		}.into();
 
-		approval_entry.import_assignment(0, 0, block_tick);
-		approval_entry.import_assignment(0, 1, block_tick);
+		approval_entry.import_assignment(0, ValidatorIndex(0), block_tick);
+		approval_entry.import_assignment(0, ValidatorIndex(1), block_tick);
 
-		approval_entry.import_assignment(1, 2, block_tick + 1);
-		approval_entry.import_assignment(1, 3, block_tick + 1);
+		approval_entry.import_assignment(1, ValidatorIndex(2), block_tick + 1);
+		approval_entry.import_assignment(1, ValidatorIndex(3), block_tick + 1);
 
-		approval_entry.import_assignment(2, 4, block_tick + no_show_duration + 2);
-		approval_entry.import_assignment(2, 5, block_tick + no_show_duration + 2);
+		approval_entry.import_assignment(2, ValidatorIndex(4), block_tick + no_show_duration + 2);
+		approval_entry.import_assignment(2, ValidatorIndex(5), block_tick + no_show_duration + 2);
 
 		let mut approvals = bitvec![BitOrderLsb0, u8; 0; n_validators];
 		approvals.set(0, true);
@@ -757,14 +757,14 @@ mod tests {
 			approved: false,
 		}.into();
 
-		approval_entry.import_assignment(0, 0, block_tick);
-		approval_entry.import_assignment(0, 1, block_tick);
+		approval_entry.import_assignment(0, ValidatorIndex(0), block_tick);
+		approval_entry.import_assignment(0, ValidatorIndex(1), block_tick);
 
-		approval_entry.import_assignment(1, 2, block_tick + 1);
-		approval_entry.import_assignment(1, 3, block_tick + 1);
+		approval_entry.import_assignment(1, ValidatorIndex(2), block_tick + 1);
+		approval_entry.import_assignment(1, ValidatorIndex(3), block_tick + 1);
 
-		approval_entry.import_assignment(2, 4, block_tick + no_show_duration + 2);
-		approval_entry.import_assignment(2, 5, block_tick + no_show_duration + 2);
+		approval_entry.import_assignment(2, ValidatorIndex(4), block_tick + no_show_duration + 2);
+		approval_entry.import_assignment(2, ValidatorIndex(5), block_tick + no_show_duration + 2);
 
 		let mut approvals = bitvec![BitOrderLsb0, u8; 0; n_validators];
 		approvals.set(0, true);
@@ -813,7 +813,7 @@ mod tests {
 			},
 		);
 
-		approval_entry.import_assignment(3, 6, block_tick);
+		approval_entry.import_assignment(3, ValidatorIndex(6), block_tick);
 		approvals.set(6, true);
 
 		let tranche_now = no_show_duration as DelayTranche + 3;
diff --git a/node/core/approval-voting/src/criteria.rs b/node/core/approval-voting/src/criteria.rs
index 5880a8463c31..6a7544afe9d5 100644
--- a/node/core/approval-voting/src/criteria.rs
+++ b/node/core/approval-voting/src/criteria.rs
@@ -248,15 +248,14 @@ pub(crate) fn compute_assignments(
 ) -> HashMap<CoreIndex, OurAssignment> {
 	let (index, assignments_key): (ValidatorIndex, AssignmentPair) = {
 		let key = config.assignment_keys.iter().enumerate()
-			.filter_map(|(i, p)| match keystore.key_pair(p) {
+			.find_map(|(i, p)| match keystore.key_pair(p) {
 				Ok(pair) => Some((ValidatorIndex(i as _), pair)),
 				Err(sc_keystore::Error::PairNotFound(_)) => None,
 				Err(e) => {
 					tracing::warn!(target: LOG_TARGET, "Encountered keystore error: {:?}", e);
 					None
 				}
-			})
-			.next();
+			});
 
 		match key {
 			None => return Default::default(),
@@ -535,7 +534,7 @@ mod tests {
 		(0..n_groups).map(|i| {
 			(i * size .. (i + 1) *size)
 				.chain(if i < big_groups { Some(scraps + i) } else { None })
-				.map(|j| j as ValidatorIndex)
+				.map(|j| ValidatorIndex(j as _))
 				.collect::<Vec<_>>()
 		}).collect()
 	}
@@ -565,7 +564,7 @@ mod tests {
 					Sr25519Keyring::Bob,
 					Sr25519Keyring::Charlie,
 				]),
-				validator_groups: vec![vec![0], vec![1, 2]],
+				validator_groups: vec![vec![ValidatorIndex(0)], vec![ValidatorIndex(1), ValidatorIndex(2)]],
 				n_cores: 2,
 				zeroth_delay_tranche_width: 10,
 				relay_vrf_modulo_samples: 3,
@@ -596,7 +595,7 @@ mod tests {
 					Sr25519Keyring::Bob,
 					Sr25519Keyring::Charlie,
 				]),
-				validator_groups: vec![vec![0], vec![1, 2]],
+				validator_groups: vec![vec![ValidatorIndex(0)], vec![ValidatorIndex(1), ValidatorIndex(2)]],
 				n_cores: 2,
 				zeroth_delay_tranche_width: 10,
 				relay_vrf_modulo_samples: 3,
@@ -660,7 +659,7 @@ mod tests {
 				group: group_for_core(core.0 as _),
 				cert: assignment.cert,
 				own_group: GroupIndex(0),
-				val_index: 0,
+				val_index: ValidatorIndex(0),
 				config: config.clone(),
 			};
 
@@ -710,7 +709,7 @@ mod tests {
 	#[test]
 	fn check_rejects_nonexistent_key() {
 		check_mutated_assignments(200, 100, 25, |m| {
-			m.val_index += 200;
+			m.val_index.0 += 200;
 			Some(false)
 		});
 	}
diff --git a/node/core/approval-voting/src/import.rs b/node/core/approval-voting/src/import.rs
index 224d9ca310ef..541c8ff4273d 100644
--- a/node/core/approval-voting/src/import.rs
+++ b/node/core/approval-voting/src/import.rs
@@ -692,6 +692,7 @@ mod tests {
 	use super::*;
 	use polkadot_node_subsystem_test_helpers::make_subsystem_context;
 	use polkadot_node_primitives::approval::{VRFOutput, VRFProof};
+	use polkadot_primitives::v1::ValidatorIndex;
 	use polkadot_subsystem::messages::AllMessages;
 	use sp_core::testing::TaskExecutor;
 	use sp_runtime::{Digest, DigestItem};
@@ -1546,7 +1547,7 @@ mod tests {
 			validators: vec![Sr25519Keyring::Alice.public().into(); 6],
 			discovery_keys: Vec::new(),
 			assignment_keys: Vec::new(),
-			validator_groups: vec![vec![0; 5], vec![0; 2]],
+			validator_groups: vec![vec![ValidatorIndex(0), ValidatorIndex(5)], vec![ValidatorIndex(0), ValidatorIndex(2)]],
 			n_cores: 6,
 			needed_approvals: 2,
 			zeroth_delay_tranche_width: irrelevant,
diff --git a/node/core/approval-voting/src/tests.rs b/node/core/approval-voting/src/tests.rs
index 7c8c9f3d94fe..457bbeb6b6a7 100644
--- a/node/core/approval-voting/src/tests.rs
+++ b/node/core/approval-voting/src/tests.rs
@@ -243,7 +243,7 @@ impl Default for StateConfig {
 			slot: Slot::from(0),
 			tick: 0,
 			validators: vec![Sr25519Keyring::Alice, Sr25519Keyring::Bob],
-			validator_groups: vec![vec![0], vec![1]],
+			validator_groups: vec![vec![ValidatorIndex(0)], vec![ValidatorIndex(1)]],
 			needed_approvals: 1,
 			no_show_slots: 2,
 		}
@@ -364,7 +364,7 @@ fn rejects_bad_assignment() {
 	let block_hash = Hash::repeat_byte(0x01);
 	let assignment_good = IndirectAssignmentCert {
 		block_hash,
-		validator: 0,
+		validator: ValidatorIndex(0),
 		cert: garbage_assignment_cert(
 			AssignmentCertKind::RelayVRFModulo {
 				sample: 0,
@@ -386,7 +386,7 @@ fn rejects_bad_assignment() {
 	// unknown hash
 	let assignment = IndirectAssignmentCert {
 		block_hash: Hash::repeat_byte(0x02),
-		validator: 0,
+		validator: ValidatorIndex(0),
 		cert: garbage_assignment_cert(
 			AssignmentCertKind::RelayVRFModulo {
 				sample: 0,
@@ -423,7 +423,7 @@ fn rejects_assignment_in_future() {
 	let candidate_index = 0;
 	let assignment = IndirectAssignmentCert {
 		block_hash,
-		validator: 0,
+		validator: ValidatorIndex(0),
 		cert: garbage_assignment_cert(
 			AssignmentCertKind::RelayVRFModulo {
 				sample: 0,
@@ -467,7 +467,7 @@ fn rejects_assignment_with_unknown_candidate() {
 	let candidate_index = 1;
 	let assignment = IndirectAssignmentCert {
 		block_hash,
-		validator: 0,
+		validator: ValidatorIndex(0),
 		cert: garbage_assignment_cert(
 			AssignmentCertKind::RelayVRFModulo {
 				sample: 0,
@@ -493,7 +493,7 @@ fn assignment_import_updates_candidate_entry_and_schedules_wakeup() {
 	let candidate_index = 0;
 	let assignment = IndirectAssignmentCert {
 		block_hash,
-		validator: 0,
+		validator: ValidatorIndex(0),
 		cert: garbage_assignment_cert(
 			AssignmentCertKind::RelayVRFModulo {
 				sample: 0,
@@ -534,7 +534,7 @@ fn assignment_import_updates_candidate_entry_and_schedules_wakeup() {
 		actions.get(1).unwrap(),
 		Action::WriteCandidateEntry(c, e) => {
 			assert_eq!(c, &candidate_hash);
-			assert!(e.approval_entry(&block_hash).unwrap().is_assigned(0));
+			assert!(e.approval_entry(&block_hash).unwrap().is_assigned(ValidatorIndex(0)));
 		}
 	);
 }
@@ -554,7 +554,7 @@ fn rejects_approval_before_assignment() {
 	let vote = IndirectSignedApprovalVote {
 		block_hash,
 		candidate_index: 0,
-		validator: 0,
+		validator: ValidatorIndex(0),
 		signature: sign_approval(Sr25519Keyring::Alice, candidate_hash, 1),
 	};
 
@@ -583,7 +583,7 @@ fn rejects_approval_if_no_candidate_entry() {
 	let vote = IndirectSignedApprovalVote {
 		block_hash,
 		candidate_index: 0,
-		validator: 0,
+		validator: ValidatorIndex(0),
 		signature: sign_approval(Sr25519Keyring::Alice, candidate_hash, 1),
 	};
 
@@ -603,7 +603,7 @@ fn rejects_approval_if_no_candidate_entry() {
 fn rejects_approval_if_no_block_entry() {
 	let block_hash = Hash::repeat_byte(0x01);
 	let candidate_hash = CandidateHash(Hash::repeat_byte(0xCC));
-	let validator_index = 0;
+	let validator_index = ValidatorIndex(0);
 
 	let mut state = State {
 		assignment_criteria: Box::new(MockAssignmentCriteria::check_only(|| {
@@ -615,7 +615,7 @@ fn rejects_approval_if_no_block_entry() {
 	let vote = IndirectSignedApprovalVote {
 		block_hash,
 		candidate_index: 0,
-		validator: 0,
+		validator: ValidatorIndex(0),
 		signature: sign_approval(Sr25519Keyring::Alice, candidate_hash, 1),
 	};
 
@@ -640,7 +640,7 @@ fn rejects_approval_if_no_block_entry() {
 fn accepts_and_imports_approval_after_assignment() {
 	let block_hash = Hash::repeat_byte(0x01);
 	let candidate_hash = CandidateHash(Hash::repeat_byte(0xCC));
-	let validator_index = 0;
+	let validator_index = ValidatorIndex(0);
 
 	let candidate_index = 0;
 	let mut state = State {
@@ -649,7 +649,7 @@ fn accepts_and_imports_approval_after_assignment() {
 		})),
 		..some_state(StateConfig {
 			validators: vec![Sr25519Keyring::Alice, Sr25519Keyring::Bob, Sr25519Keyring::Charlie],
-			validator_groups: vec![vec![0, 1], vec![2]],
+			validator_groups: vec![vec![ValidatorIndex(0), ValidatorIndex(1)], vec![ValidatorIndex(2)]],
 			needed_approvals: 2,
 			..Default::default()
 		})
@@ -680,7 +680,7 @@ fn accepts_and_imports_approval_after_assignment() {
 		actions.get(0).unwrap(),
 		Action::WriteCandidateEntry(c_hash, c_entry) => {
 			assert_eq!(c_hash, &candidate_hash);
-			assert!(c_entry.approvals().get(validator_index as usize).unwrap());
+			assert!(c_entry.approvals().get(validator_index.0 as usize).unwrap());
 			assert!(!c_entry.approval_entry(&block_hash).unwrap().is_approved());
 		}
 	);
@@ -690,7 +690,7 @@ fn accepts_and_imports_approval_after_assignment() {
 fn second_approval_import_is_no_op() {
 	let block_hash = Hash::repeat_byte(0x01);
 	let candidate_hash = CandidateHash(Hash::repeat_byte(0xCC));
-	let validator_index = 0;
+	let validator_index = ValidatorIndex(0);
 
 	let candidate_index = 0;
 	let mut state = State {
@@ -699,7 +699,7 @@ fn second_approval_import_is_no_op() {
 		})),
 		..some_state(StateConfig {
 			validators: vec![Sr25519Keyring::Alice, Sr25519Keyring::Bob, Sr25519Keyring::Charlie],
-			validator_groups: vec![vec![0, 1], vec![2]],
+			validator_groups: vec![vec![ValidatorIndex(0), ValidatorIndex(1)], vec![ValidatorIndex(2)]],
 			needed_approvals: 2,
 			..Default::default()
 		})
@@ -734,8 +734,8 @@ fn second_approval_import_is_no_op() {
 fn check_and_apply_full_approval_sets_flag_and_bit() {
 	let block_hash = Hash::repeat_byte(0x01);
 	let candidate_hash = CandidateHash(Hash::repeat_byte(0xCC));
-	let validator_index_a = 0;
-	let validator_index_b = 1;
+	let validator_index_a = ValidatorIndex(0);
+	let validator_index_b = ValidatorIndex(1);
 
 	let mut state = State {
 		assignment_criteria: Box::new(MockAssignmentCriteria::check_only(|| {
@@ -743,7 +743,7 @@ fn check_and_apply_full_approval_sets_flag_and_bit() {
 		})),
 		..some_state(StateConfig {
 			validators: vec![Sr25519Keyring::Alice, Sr25519Keyring::Bob, Sr25519Keyring::Charlie],
-			validator_groups: vec![vec![0, 1], vec![2]],
+			validator_groups: vec![vec![ValidatorIndex(0), ValidatorIndex(1)], vec![ValidatorIndex(2)]],
 			needed_approvals: 2,
 			..Default::default()
 		})
@@ -795,8 +795,8 @@ fn check_and_apply_full_approval_sets_flag_and_bit() {
 fn check_and_apply_full_approval_does_not_load_cached_block_from_db() {
 	let block_hash = Hash::repeat_byte(0x01);
 	let candidate_hash = CandidateHash(Hash::repeat_byte(0xCC));
-	let validator_index_a = 0;
-	let validator_index_b = 1;
+	let validator_index_a = ValidatorIndex(0);
+	let validator_index_b = ValidatorIndex(1);
 
 	let mut state = State {
 		assignment_criteria: Box::new(MockAssignmentCriteria::check_only(|| {
@@ -804,7 +804,7 @@ fn check_and_apply_full_approval_does_not_load_cached_block_from_db() {
 		})),
 		..some_state(StateConfig {
 			validators: vec![Sr25519Keyring::Alice, Sr25519Keyring::Bob, Sr25519Keyring::Charlie],
-			validator_groups: vec![vec![0, 1], vec![2]],
+			validator_groups: vec![vec![ValidatorIndex(0), ValidatorIndex(1)], vec![ValidatorIndex(2)]],
 			needed_approvals: 2,
 			..Default::default()
 		})
@@ -867,7 +867,7 @@ fn assignment_triggered_by_all_with_less_than_supermajority() {
 					AssignmentCertKind::RelayVRFModulo { sample: 0 }
 				),
 				tranche: 1,
-				validator_index: 4,
+				validator_index: ValidatorIndex(4),
 				triggered: false,
 			}),
 			assignments: bitvec::bitvec![BitOrderLsb0, u8; 0; 4],
@@ -886,15 +886,15 @@ fn assignment_triggered_by_all_with_less_than_supermajority() {
 	candidate_entry
 		.approval_entry_mut(&block_hash)
 		.unwrap()
-		.import_assignment(0, 0, 0);
+		.import_assignment(0, ValidatorIndex(0), 0);
 
 	candidate_entry
 		.approval_entry_mut(&block_hash)
 		.unwrap()
-		.import_assignment(0, 1, 0);
+		.import_assignment(0, ValidatorIndex(1), 0);
 
-	candidate_entry.mark_approval(0);
-	candidate_entry.mark_approval(1);
+	candidate_entry.mark_approval(ValidatorIndex(0));
+	candidate_entry.mark_approval(ValidatorIndex(1));
 
 	let tranche_now = 1;
 	assert!(should_trigger_assignment(
@@ -918,7 +918,7 @@ fn assignment_not_triggered_by_all_with_supermajority() {
 					AssignmentCertKind::RelayVRFModulo { sample: 0 }
 				),
 				tranche: 1,
-				validator_index: 4,
+				validator_index: ValidatorIndex(4),
 				triggered: false,
 			}),
 			assignments: bitvec::bitvec![BitOrderLsb0, u8; 0; 4],
@@ -937,21 +937,21 @@ fn assignment_not_triggered_by_all_with_supermajority() {
 	candidate_entry
 		.approval_entry_mut(&block_hash)
 		.unwrap()
-		.import_assignment(0, 0, 0);
+		.import_assignment(0, ValidatorIndex(0), 0);
 
 	candidate_entry
 		.approval_entry_mut(&block_hash)
 		.unwrap()
-		.import_assignment(0, 1, 0);
+		.import_assignment(0, ValidatorIndex(1), 0);
 
 	candidate_entry
 		.approval_entry_mut(&block_hash)
 		.unwrap()
-		.import_assignment(0, 2, 0);
+		.import_assignment(0, ValidatorIndex(2), 0);
 
-	candidate_entry.mark_approval(0);
-	candidate_entry.mark_approval(1);
-	candidate_entry.mark_approval(2);
+	candidate_entry.mark_approval(ValidatorIndex(0));
+	candidate_entry.mark_approval(ValidatorIndex(1));
+	candidate_entry.mark_approval(ValidatorIndex(2));
 
 	let tranche_now = 1;
 	assert!(!should_trigger_assignment(
@@ -975,7 +975,7 @@ fn assignment_not_triggered_if_already_triggered() {
 					AssignmentCertKind::RelayVRFModulo { sample: 0 }
 				),
 				tranche: 1,
-				validator_index: 4,
+				validator_index: ValidatorIndex(4),
 				triggered: true,
 			}),
 			assignments: bitvec::bitvec![BitOrderLsb0, u8; 0; 4],
@@ -1012,7 +1012,7 @@ fn assignment_not_triggered_by_exact() {
 					AssignmentCertKind::RelayVRFModulo { sample: 0 }
 				),
 				tranche: 1,
-				validator_index: 4,
+				validator_index: ValidatorIndex(4),
 				triggered: false,
 			}),
 			assignments: bitvec::bitvec![BitOrderLsb0, u8; 0; 4],
@@ -1050,7 +1050,7 @@ fn assignment_not_triggered_more_than_maximum() {
 					AssignmentCertKind::RelayVRFModulo { sample: 0 }
 				),
 				tranche: maximum_broadcast + 1,
-				validator_index: 4,
+				validator_index: ValidatorIndex(4),
 				triggered: false,
 			}),
 			assignments: bitvec::bitvec![BitOrderLsb0, u8; 0; 4],
@@ -1093,7 +1093,7 @@ fn assignment_triggered_if_at_maximum() {
 					AssignmentCertKind::RelayVRFModulo { sample: 0 }
 				),
 				tranche: maximum_broadcast,
-				validator_index: 4,
+				validator_index: ValidatorIndex(4),
 				triggered: false,
 			}),
 			assignments: bitvec::bitvec![BitOrderLsb0, u8; 0; 4],
@@ -1136,7 +1136,7 @@ fn assignment_not_triggered_if_at_maximum_but_clock_is_before() {
 					AssignmentCertKind::RelayVRFModulo { sample: 0 }
 				),
 				tranche: maximum_broadcast,
-				validator_index: 4,
+				validator_index: ValidatorIndex(4),
 				triggered: false,
 			}),
 			assignments: bitvec::bitvec![BitOrderLsb0, u8; 0; 4],
@@ -1179,7 +1179,7 @@ fn assignment_not_triggered_if_at_maximum_but_clock_is_before_with_drift() {
 					AssignmentCertKind::RelayVRFModulo { sample: 0 }
 				),
 				tranche: maximum_broadcast,
-				validator_index: 4,
+				validator_index: ValidatorIndex(4),
 				triggered: false,
 			}),
 			assignments: bitvec::bitvec![BitOrderLsb0, u8; 0; 4],
@@ -1259,8 +1259,8 @@ fn block_not_approved_until_all_candidates_approved() {
 	let candidate_hash = CandidateHash(Hash::repeat_byte(0xCC));
 	let candidate_hash_2 = CandidateHash(Hash::repeat_byte(0xDD));
 
-	let validator_index_a = 0;
-	let validator_index_b = 1;
+	let validator_index_a = ValidatorIndex(0);
+	let validator_index_b = ValidatorIndex(1);
 
 	let mut state = State {
 		assignment_criteria: Box::new(MockAssignmentCriteria::check_only(|| {
@@ -1268,7 +1268,7 @@ fn block_not_approved_until_all_candidates_approved() {
 		})),
 		..some_state(StateConfig {
 			validators: vec![Sr25519Keyring::Alice, Sr25519Keyring::Bob, Sr25519Keyring::Charlie],
-			validator_groups: vec![vec![0, 1], vec![2]],
+			validator_groups: vec![vec![ValidatorIndex(0), ValidatorIndex(1)], vec![ValidatorIndex(2)]],
 			needed_approvals: 2,
 			..Default::default()
 		})
@@ -1341,8 +1341,8 @@ fn candidate_approval_applied_to_all_blocks() {
 	let block_hash = Hash::repeat_byte(0x01);
 	let block_hash_2 = Hash::repeat_byte(0x02);
 	let candidate_hash = CandidateHash(Hash::repeat_byte(0xCC));
-	let validator_index_a = 0;
-	let validator_index_b = 1;
+	let validator_index_a = ValidatorIndex(0);
+	let validator_index_b = ValidatorIndex(1);
 
 	let slot = Slot::from(1);
 	let session_index = 1;
@@ -1353,7 +1353,7 @@ fn candidate_approval_applied_to_all_blocks() {
 		})),
 		..some_state(StateConfig {
 			validators: vec![Sr25519Keyring::Alice, Sr25519Keyring::Bob, Sr25519Keyring::Charlie],
-			validator_groups: vec![vec![0, 1], vec![2]],
+			validator_groups: vec![vec![ValidatorIndex(0), ValidatorIndex(1)], vec![ValidatorIndex(2)]],
 			needed_approvals: 2,
 			session_index,
 			slot,
@@ -1456,7 +1456,7 @@ fn approved_ancestor_all_approved() {
 		})),
 		..some_state(StateConfig {
 			validators: vec![Sr25519Keyring::Alice, Sr25519Keyring::Bob],
-			validator_groups: vec![vec![0], vec![1]],
+			validator_groups: vec![vec![ValidatorIndex(0)], vec![ValidatorIndex(1)]],
 			needed_approvals: 2,
 			session_index,
 			slot,
@@ -1538,7 +1538,7 @@ fn approved_ancestor_missing_approval() {
 		})),
 		..some_state(StateConfig {
 			validators: vec![Sr25519Keyring::Alice, Sr25519Keyring::Bob],
-			validator_groups: vec![vec![0], vec![1]],
+			validator_groups: vec![vec![ValidatorIndex(0)], vec![ValidatorIndex(1)]],
 			needed_approvals: 2,
 			session_index,
 			slot,
@@ -1615,7 +1615,7 @@ fn process_wakeup_trigger_assignment_launch_approval() {
 		})),
 		..some_state(StateConfig {
 			validators: vec![Sr25519Keyring::Alice, Sr25519Keyring::Bob],
-			validator_groups: vec![vec![0], vec![1]],
+			validator_groups: vec![vec![ValidatorIndex(0)], vec![ValidatorIndex(1)]],
 			needed_approvals: 2,
 			session_index,
 			slot,
@@ -1641,7 +1641,7 @@ fn process_wakeup_trigger_assignment_launch_approval() {
 				AssignmentCertKind::RelayVRFModulo { sample: 0 }
 			),
 			tranche: 0,
-			validator_index: 0,
+			validator_index: ValidatorIndex(0),
 			triggered: false,
 		}.into());
 
@@ -1700,7 +1700,7 @@ fn process_wakeup_schedules_wakeup() {
 		})),
 		..some_state(StateConfig {
 			validators: vec![Sr25519Keyring::Alice, Sr25519Keyring::Bob],
-			validator_groups: vec![vec![0], vec![1]],
+			validator_groups: vec![vec![ValidatorIndex(0)], vec![ValidatorIndex(1)]],
 			needed_approvals: 2,
 			session_index,
 			slot,
@@ -1718,7 +1718,7 @@ fn process_wakeup_schedules_wakeup() {
 				AssignmentCertKind::RelayVRFModulo { sample: 0 }
 			),
 			tranche: 10,
-			validator_index: 0,
+			validator_index: ValidatorIndex(0),
 			triggered: false,
 		}.into());
 
diff --git a/node/core/av-store/src/tests.rs b/node/core/av-store/src/tests.rs
index 1d75e2b9beb9..c92e28ce3d8b 100644
--- a/node/core/av-store/src/tests.rs
+++ b/node/core/av-store/src/tests.rs
@@ -260,7 +260,7 @@ fn runtime_api_error_does_not_stop_the_subsystem() {
 		// but that's fine, we're still alive
 		let (tx, rx) = oneshot::channel();
 		let candidate_hash = CandidateHash(Hash::repeat_byte(33));
-		let validator_index = 5;
+		let validator_index = ValidatorIndex(5);
 		let query_chunk = AvailabilityStoreMessage::QueryChunk(
 			candidate_hash,
 			validator_index,
@@ -281,7 +281,7 @@ fn store_chunk_works() {
 		let TestHarness { mut virtual_overseer } = test_harness;
 		let relay_parent = Hash::repeat_byte(32);
 		let candidate_hash = CandidateHash(Hash::repeat_byte(33));
-		let validator_index = 5;
+		let validator_index = ValidatorIndex(5);
 		let n_validators = 10;
 
 		let chunk = ErasureChunk {
@@ -333,7 +333,7 @@ fn store_chunk_does_nothing_if_no_entry_already() {
 		let TestHarness { mut virtual_overseer } = test_harness;
 		let relay_parent = Hash::repeat_byte(32);
 		let candidate_hash = CandidateHash(Hash::repeat_byte(33));
-		let validator_index = 5;
+		let validator_index = ValidatorIndex(5);
 
 		let chunk = ErasureChunk {
 			chunk: vec![1, 2, 3],
@@ -372,7 +372,7 @@ fn query_chunk_checks_meta() {
 	test_harness(TestState::default(), store.clone(), |test_harness| async move {
 		let TestHarness { mut virtual_overseer } = test_harness;
 		let candidate_hash = CandidateHash(Hash::repeat_byte(33));
-		let validator_index = 5;
+		let validator_index = ValidatorIndex(5);
 		let n_validators = 10;
 
 		// Ensure an entry already exists. In reality this would come from watching
@@ -382,7 +382,7 @@ fn query_chunk_checks_meta() {
 				data_available: false,
 				chunks_stored: {
 					let mut v = bitvec::bitvec![BitOrderLsb0, u8; 0; n_validators];
-					v.set(validator_index as usize, true);
+					v.set(validator_index.0 as usize, true);
 					v
 				},
 				state: State::Unavailable(BETimestamp(0)),
@@ -402,7 +402,7 @@ fn query_chunk_checks_meta() {
 		let (tx, rx) = oneshot::channel();
 		let query_chunk = AvailabilityStoreMessage::QueryChunkAvailability(
 			candidate_hash,
-			validator_index + 1,
+			ValidatorIndex(validator_index.0 + 1),
 			tx,
 		);
 
@@ -418,7 +418,7 @@ fn store_block_works() {
 	test_harness(test_state.clone(), store.clone(), |test_harness| async move {
 		let TestHarness { mut virtual_overseer } = test_harness;
 		let candidate_hash = CandidateHash(Hash::repeat_byte(1));
-		let validator_index = 5;
+		let validator_index = ValidatorIndex(5);
 		let n_validators = 10;
 
 		let pov = PoV {
@@ -455,7 +455,7 @@ fn store_block_works() {
 		let branch = branches.nth(5).unwrap();
 		let expected_chunk = ErasureChunk {
 			chunk: branch.1.to_vec(),
-			index: 5,
+			index: ValidatorIndex(5),
 			proof: branch.0,
 		};
 
@@ -497,10 +497,10 @@ fn store_pov_and_query_chunk_works() {
 
 		assert_eq!(rx.await.unwrap(), Ok(()));
 
-		for validator_index in 0..n_validators {
-			let chunk = query_chunk(&mut virtual_overseer, candidate_hash, validator_index).await.unwrap();
+		for i in 0..n_validators {
+			let chunk = query_chunk(&mut virtual_overseer, candidate_hash, ValidatorIndex(i as _)).await.unwrap();
 
-			assert_eq!(chunk.chunk, chunks_expected[validator_index as usize]);
+			assert_eq!(chunk.chunk, chunks_expected[i as usize]);
 		}
 	});
 }
@@ -842,7 +842,7 @@ async fn query_available_data(
 async fn query_chunk(
 	virtual_overseer: &mut test_helpers::TestSubsystemContextHandle<AvailabilityStoreMessage>,
 	candidate_hash: CandidateHash,
-	index: u32,
+	index: ValidatorIndex,
 ) -> Option<ErasureChunk> {
 	let (tx, rx) = oneshot::channel();
 
@@ -859,7 +859,7 @@ async fn query_all_chunks(
 	expect_present: bool,
 ) -> bool {
 	for i in 0..n_validators {
-		if query_chunk(virtual_overseer, candidate_hash, i).await.is_some() != expect_present {
+		if query_chunk(virtual_overseer, candidate_hash, ValidatorIndex(i)).await.is_some() != expect_present {
 			return false
 		}
 	}
diff --git a/node/core/backing/src/lib.rs b/node/core/backing/src/lib.rs
index dc8279592894..e58a88c1aeed 100644
--- a/node/core/backing/src/lib.rs
+++ b/node/core/backing/src/lib.rs
@@ -1281,7 +1281,8 @@ mod tests {
 
 			let validator_public = validator_pubkeys(&validators);
 
-			let validator_groups = vec![vec![2, 0, 3, 5], vec![1], vec![4]];
+			let validator_groups = vec![vec![2, 0, 3, 5], vec![1], vec![4]]
+				.into_iter().map(|g| g.into_iter().map(ValidatorIndex).collect()).collect();
 			let group_rotation_info = GroupRotationInfo {
 				session_start_block: 0,
 				group_rotation_frequency: 100,
@@ -1598,7 +1599,7 @@ mod tests {
 				&test_state.keystore,
 				Statement::Seconded(candidate_a.clone()),
 				&test_state.signing_context,
-				2,
+				ValidatorIndex(2),
 				&public2.into(),
 			).await.expect("should be signed");
 
@@ -1606,7 +1607,7 @@ mod tests {
 				&test_state.keystore,
 				Statement::Valid(candidate_a_hash),
 				&test_state.signing_context,
-				5,
+				ValidatorIndex(5),
 				&public1.into(),
 			).await.expect("should be signed");
 
@@ -1740,7 +1741,7 @@ mod tests {
 				&test_state.keystore,
 				Statement::Seconded(candidate_a.clone()),
 				&test_state.signing_context,
-				2,
+				ValidatorIndex(2),
 				&public2.into(),
 			).await.expect("should be signed");
 
@@ -1748,7 +1749,7 @@ mod tests {
 				&test_state.keystore,
 				Statement::Valid(candidate_a_hash),
 				&test_state.signing_context,
-				5,
+				ValidatorIndex(5),
 				&public1.into(),
 			).await.expect("should be signed");
 
@@ -1756,7 +1757,7 @@ mod tests {
 				&test_state.keystore,
 				Statement::Valid(candidate_a_hash),
 				&test_state.signing_context,
-				3,
+				ValidatorIndex(3),
 				&public3.into(),
 			).await.expect("should be signed");
 
@@ -1893,7 +1894,7 @@ mod tests {
 				&test_state.keystore,
 				Statement::Seconded(candidate_a.clone()),
 				&test_state.signing_context,
-				2,
+				ValidatorIndex(2),
 				&public2.into(),
 			).await.expect("should be signed");
 
@@ -1901,7 +1902,7 @@ mod tests {
 				&test_state.keystore,
 				Statement::Invalid(candidate_a_hash),
 				&test_state.signing_context,
-				2,
+				ValidatorIndex(2),
 				&public2.into(),
 			).await.expect("should be signed");
 
@@ -1909,7 +1910,7 @@ mod tests {
 				&test_state.keystore,
 				Statement::Invalid(candidate_a_hash),
 				&test_state.signing_context,
-				0,
+				ValidatorIndex(0),
 				&public0.into(),
 			).await.expect("should be signed");
 
@@ -2001,7 +2002,7 @@ mod tests {
 						validator_index,
 						s1,
 						&test_state.signing_context,
-						&test_state.validator_public[validator_index as usize],
+						&test_state.validator_public[validator_index.0 as usize],
 					).expect("signature must be valid");
 
 					SignedFullStatement::new(
@@ -2009,7 +2010,7 @@ mod tests {
 						validator_index,
 						s2,
 						&test_state.signing_context,
-						&test_state.validator_public[validator_index as usize],
+						&test_state.validator_public[validator_index.0 as usize],
 					).expect("signature must be valid");
 				}
 			);
@@ -2041,7 +2042,7 @@ mod tests {
 						validator_index,
 						s1,
 						&test_state.signing_context,
-						&test_state.validator_public[validator_index as usize],
+						&test_state.validator_public[validator_index.0 as usize],
 					).expect("signature must be valid");
 
 					SignedFullStatement::new(
@@ -2049,7 +2050,7 @@ mod tests {
 						validator_index,
 						s2,
 						&test_state.signing_context,
-						&test_state.validator_public[validator_index as usize],
+						&test_state.validator_public[validator_index.0 as usize],
 					).expect("signature must be valid");
 				}
 			);
@@ -2222,7 +2223,7 @@ mod tests {
 				&test_state.keystore,
 				Statement::Seconded(candidate.clone()),
 				&test_state.signing_context,
-				2,
+				ValidatorIndex(2),
 				&validator2.into(),
 			).await.expect("should be signed");
 
@@ -2360,7 +2361,7 @@ mod tests {
 				&test_state.keystore,
 				Statement::Seconded(candidate.clone()),
 				&test_state.signing_context,
-				2,
+				ValidatorIndex(2),
 				&public2.into(),
 			).await.expect("should be signed");
 
@@ -2502,7 +2503,7 @@ mod tests {
 				&test_state.keystore,
 				Statement::Seconded(candidate_a.clone()),
 				&test_state.signing_context,
-				2,
+				ValidatorIndex(2),
 				&public2.into(),
 			).await.expect("should be signed");
 
@@ -2541,7 +2542,7 @@ mod tests {
 		let validator_public = validator_pubkeys(&validators);
 		let validator_groups = {
 			let mut validator_groups = HashMap::new();
-			validator_groups.insert(para_id, vec![0, 1, 2, 3, 4, 5]);
+			validator_groups.insert(para_id, vec![0, 1, 2, 3, 4, 5].into_iter().map(ValidatorIndex).collect());
 			validator_groups
 		};
 
@@ -2566,9 +2567,9 @@ mod tests {
 		let attested = TableAttestedCandidate {
 			candidate: Default::default(),
 			validity_votes: vec![
-				(5, fake_attestation(5)),
-				(3, fake_attestation(3)),
-				(1, fake_attestation(1)),
+				(ValidatorIndex(5), fake_attestation(5)),
+				(ValidatorIndex(3), fake_attestation(3)),
+				(ValidatorIndex(1), fake_attestation(1)),
 			],
 			group_id: para_id,
 		};
diff --git a/node/network/approval-distribution/src/tests.rs b/node/network/approval-distribution/src/tests.rs
index 5e0753749e2a..ed511e1e1113 100644
--- a/node/network/approval-distribution/src/tests.rs
+++ b/node/network/approval-distribution/src/tests.rs
@@ -208,7 +208,7 @@ fn try_import_the_same_assignment() {
 		overseer_send(overseer, msg).await;
 
 		// send the assignment related to `hash`
-		let validator_index = 0u32;
+		let validator_index = ValidatorIndex(0);
 		let cert = fake_assignment_cert(hash, validator_index);
 		let assignments = vec![(cert.clone(), 0u32)];
 
@@ -299,7 +299,7 @@ fn spam_attack_results_in_negative_reputation_change() {
 		// to populate our knowledge
 		let assignments: Vec<_> = (0..candidates_count)
 			.map(|candidate_index| {
-				let validator_index = candidate_index as u32;
+				let validator_index = ValidatorIndex(candidate_index as u32);
 				let cert = fake_assignment_cert(hash_b, validator_index);
 				(cert, candidate_index as u32)
 			}).collect();
@@ -372,7 +372,7 @@ fn import_approval_happy_path() {
 		overseer_send(overseer, msg).await;
 
 		// import an assignment related to `hash` locally
-		let validator_index = 0u32;
+		let validator_index = ValidatorIndex(0);
 		let candidate_index = 0u32;
 		let cert = fake_assignment_cert(hash, validator_index);
 		overseer_send(
@@ -455,7 +455,7 @@ fn import_approval_bad() {
 		let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]);
 		overseer_send(overseer, msg).await;
 
-		let validator_index = 0u32;
+		let validator_index = ValidatorIndex(0);
 		let candidate_index = 0u32;
 		let cert = fake_assignment_cert(hash, validator_index);
 
@@ -616,8 +616,8 @@ fn update_peer_view() {
 		let msg = ApprovalDistributionMessage::NewBlocks(vec![meta_a, meta_b, meta_c]);
 		overseer_send(overseer, msg).await;
 
-		let cert_a = fake_assignment_cert(hash_a, 0);
-		let cert_b = fake_assignment_cert(hash_b, 0);
+		let cert_a = fake_assignment_cert(hash_a, ValidatorIndex(0));
+		let cert_b = fake_assignment_cert(hash_b, ValidatorIndex(0));
 
 		overseer_send(
 			overseer,
@@ -670,7 +670,7 @@ fn update_peer_view() {
 			)
 		).await;
 
-		let cert_c = fake_assignment_cert(hash_c, 0);
+		let cert_c = fake_assignment_cert(hash_c, ValidatorIndex(0));
 
 		overseer_send(
 			overseer,
@@ -753,7 +753,7 @@ fn import_remotely_then_locally() {
 		overseer_send(overseer, msg).await;
 
 		// import the assignment remotely first
-		let validator_index = 0u32;
+		let validator_index = ValidatorIndex(0);
 		let candidate_index = 0u32;
 		let cert = fake_assignment_cert(hash, validator_index);
 		let assignments = vec![(cert.clone(), candidate_index)];
diff --git a/node/network/availability-recovery/src/tests.rs b/node/network/availability-recovery/src/tests.rs
index 40b708d387f8..bbe63dc8a093 100644
--- a/node/network/availability-recovery/src/tests.rs
+++ b/node/network/availability-recovery/src/tests.rs
@@ -184,7 +184,7 @@ impl TestState {
 					validators: self.validator_public.clone(),
 					discovery_keys: self.validator_authority_id.clone(),
 					// all validators in the same group.
-					validator_groups: vec![(0..self.validators.len()).map(|i| i as ValidatorIndex).collect()],
+					validator_groups: vec![(0..self.validators.len()).map(|i| ValidatorIndex(i as _)).collect()],
 					..Default::default()
 				}))).unwrap();
 			}
@@ -272,10 +272,10 @@ impl TestState {
 						virtual_overseer,
 						AvailabilityRecoveryMessage::NetworkBridgeUpdateV1(
 							NetworkBridgeEvent::PeerMessage(
-								self.validator_peer_id[validator_index as usize].clone(),
+								self.validator_peer_id[validator_index.0 as usize].clone(),
 								protocol_v1::AvailabilityRecoveryMessage::Chunk(
 									request_id,
-									Some(self.chunks[validator_index as usize].clone()),
+									Some(self.chunks[validator_index.0 as usize].clone()),
 								)
 							)
 						)
@@ -317,10 +317,10 @@ impl TestState {
 						virtual_overseer,
 						AvailabilityRecoveryMessage::NetworkBridgeUpdateV1(
 							NetworkBridgeEvent::PeerMessage(
-								self.validator_peer_id[validator_index as usize].clone(),
+								self.validator_peer_id[validator_index.0 as usize].clone(),
 								protocol_v1::AvailabilityRecoveryMessage::Chunk(
 									request_id,
-									Some(self.chunks[validator_index as usize].clone()),
+									Some(self.chunks[validator_index.0 as usize].clone()),
 								)
 							)
 						)
@@ -457,7 +457,7 @@ fn derive_erasure_chunks_with_proofs_and_root(
 		.enumerate()
 		.map(|(index, (proof, chunk))| ErasureChunk {
 			chunk: chunk.to_vec(),
-			index: index as _,
+			index: ValidatorIndex(index as _),
 			proof,
 		})
 		.collect::<Vec<ErasureChunk>>();
diff --git a/node/network/bitfield-distribution/src/lib.rs b/node/network/bitfield-distribution/src/lib.rs
index e1f4df4a41d8..fac59333664d 100644
--- a/node/network/bitfield-distribution/src/lib.rs
+++ b/node/network/bitfield-distribution/src/lib.rs
@@ -767,7 +767,7 @@ mod test {
 	use bitvec::bitvec;
 	use futures::executor;
 	use maplit::hashmap;
-	use polkadot_primitives::v1::{Signed, AvailabilityBitfield};
+	use polkadot_primitives::v1::{Signed, AvailabilityBitfield, ValidatorIndex};
 	use polkadot_node_subsystem_test_helpers::make_subsystem_context;
 	use polkadot_node_subsystem_util::TimeoutExt;
 	use sp_keystore::{SyncCryptoStorePtr, SyncCryptoStore};
@@ -882,7 +882,7 @@ mod test {
 			&keystore,
 			payload,
 			&signing_context,
-			0,
+			ValidatorIndex(0),
 			&malicious.into(),
 		)).expect("should be signed");
 
@@ -947,7 +947,7 @@ mod test {
 			&keystore,
 			payload,
 			&signing_context,
-			42,
+			ValidatorIndex(42),
 			&validator,
 		)).expect("should be signed");
 
@@ -1004,7 +1004,7 @@ mod test {
 			&keystore,
 			payload,
 			&signing_context,
-			0,
+			ValidatorIndex(0),
 			&validator,
 		)).expect("should be signed");
 
@@ -1119,7 +1119,7 @@ mod test {
 			&keystore,
 			payload,
 			&signing_context,
-			0,
+			ValidatorIndex(0),
 			&validator,
 		)).expect("should be signed");
 
@@ -1215,7 +1215,7 @@ mod test {
 			&keystore,
 			payload,
 			&signing_context,
-			0,
+			ValidatorIndex(0),
 			&validator,
 		)).expect("should be signed");
 
@@ -1374,7 +1374,7 @@ mod test {
 			&keystore,
 			payload,
 			&signing_context,
-			0,
+			ValidatorIndex(0),
 			&validator,
 		)).expect("should be signed");
 
diff --git a/node/network/collator-protocol/src/collator_side.rs b/node/network/collator-protocol/src/collator_side.rs
index eccab4be21a2..d31365f2784a 100644
--- a/node/network/collator-protocol/src/collator_side.rs
+++ b/node/network/collator-protocol/src/collator_side.rs
@@ -937,7 +937,8 @@ mod tests {
 				.take(validator_public.len())
 				.collect();
 
-			let validator_groups = vec![vec![2, 0, 4], vec![3, 2, 4]];
+			let validator_groups = vec![vec![2, 0, 4], vec![3, 2, 4]]
+				.into_iter().map(|g| g.into_iter().map(ValidatorIndex).collect()).collect();
 			let group_rotation_info = GroupRotationInfo {
 				session_start_block: 0,
 				group_rotation_frequency: 100,
@@ -979,20 +980,20 @@ mod tests {
 		}
 
 		fn current_group_validator_peer_ids(&self) -> Vec<PeerId> {
-			self.current_group_validator_indices().iter().map(|i| self.validator_peer_id[*i as usize].clone()).collect()
+			self.current_group_validator_indices().iter().map(|i| self.validator_peer_id[i.0 as usize].clone()).collect()
 		}
 
 		fn current_group_validator_authority_ids(&self) -> Vec<AuthorityDiscoveryId> {
 			self.current_group_validator_indices()
 				.iter()
-				.map(|i| self.validator_authority_id[*i as usize].clone())
+				.map(|i| self.validator_authority_id[i.0 as usize].clone())
 				.collect()
 		}
 
 		fn current_group_validator_ids(&self) -> Vec<ValidatorId> {
 			self.current_group_validator_indices()
 				.iter()
-				.map(|i| self.validator_public[*i as usize].clone())
+				.map(|i| self.validator_public[i.0 as usize].clone())
 				.collect()
 		}
 
@@ -1003,7 +1004,7 @@ mod tests {
 		fn next_group_validator_authority_ids(&self) -> Vec<AuthorityDiscoveryId> {
 			self.next_group_validator_indices()
 				.iter()
-				.map(|i| self.validator_authority_id[*i as usize].clone())
+				.map(|i| self.validator_authority_id[i.0 as usize].clone())
 				.collect()
 		}
 
diff --git a/node/network/pov-distribution/src/tests.rs b/node/network/pov-distribution/src/tests.rs
index 2dfc0ce11f96..d8ceab55374c 100644
--- a/node/network/pov-distribution/src/tests.rs
+++ b/node/network/pov-distribution/src/tests.rs
@@ -174,7 +174,8 @@ impl Default for TestState {
 			.take(validator_public.len())
 			.collect();
 
-		let validator_groups = vec![vec![2, 0, 4], vec![1], vec![3]].into_iter().map(|g| g.map(ValidatorIndex)).collect();
+		let validator_groups = vec![vec![2, 0, 4], vec![1], vec![3]]
+			.into_iter().map(|g| g.into_iter().map(ValidatorIndex).collect()).collect();
 		let group_rotation_info = GroupRotationInfo {
 			session_start_block: 0,
 			group_rotation_frequency: 100,
@@ -737,7 +738,8 @@ fn we_inform_peers_with_same_view_we_are_awaiting() {
 		.take(validators.len())
 		.collect();
 
-	let validator_groups = vec![vec![2, 0, 4], vec![1], vec![3]].map(|g| g.map(ValidatorIndex)).collect();
+	let validator_groups = vec![vec![2, 0, 4], vec![1], vec![3]]
+		.into_iter().map(|g| g.into_iter().map(ValidatorIndex).collect()).collect();
 	let group_rotation_info = GroupRotationInfo {
 		session_start_block: 0,
 		group_rotation_frequency: 100,
diff --git a/runtime/parachains/src/inclusion.rs b/runtime/parachains/src/inclusion.rs
index 1ed91b386817..d53f3240f6b6 100644
--- a/runtime/parachains/src/inclusion.rs
+++ b/runtime/parachains/src/inclusion.rs
@@ -1602,7 +1602,7 @@ mod tests {
 				group_index if group_index == GroupIndex::from(1) => Some(vec![2, 3]),
 				group_index if group_index == GroupIndex::from(2) => Some(vec![4]),
 				_ => panic!("Group index out of bounds for 2 parachains and 1 parathread core"),
-			}.map(|m| m.map(ValidatorIndex));
+			}.map(|m| m.into_iter().map(ValidatorIndex).collect::<Vec<_>>());
 
 			let thread_collator: CollatorId = Sr25519Keyring::Two.public().into();
 
@@ -2089,7 +2089,7 @@ mod tests {
 				group_index if group_index == GroupIndex::from(1) => Some(vec![2, 3]),
 				group_index if group_index == GroupIndex::from(2) => Some(vec![4]),
 				_ => panic!("Group index out of bounds for 2 parachains and 1 parathread core"),
-			};
+			}.map(|vs| vs.into_iter().map(ValidatorIndex).collect::<Vec<_>>());
 
 			let thread_collator: CollatorId = Sr25519Keyring::Two.public().into();
 
@@ -2284,7 +2284,7 @@ mod tests {
 			let group_validators = |group_index: GroupIndex| match group_index {
 				group_index if group_index == GroupIndex::from(0) => Some(vec![0, 1, 2, 3, 4]),
 				_ => panic!("Group index out of bounds for 1 parachain"),
-			};
+			}.map(|vs| vs.into_iter().map(ValidatorIndex).collect::<Vec<_>>());
 
 			let chain_a_assignment = CoreAssignment {
 				core: CoreIndex::from(0),
@@ -2382,7 +2382,7 @@ mod tests {
 			run_to_block(10, |_| None);
 
 			<AvailabilityBitfields<Test>>::insert(
-				&0,
+				&ValidatorIndex(0),
 				AvailabilityBitfieldRecord {
 					bitfield: default_bitfield(),
 					submitted_at: 9,
@@ -2390,7 +2390,7 @@ mod tests {
 			);
 
 			<AvailabilityBitfields<Test>>::insert(
-				&1,
+				&ValidatorIndex(1),
 				AvailabilityBitfieldRecord {
 					bitfield: default_bitfield(),
 					submitted_at: 9,
@@ -2398,7 +2398,7 @@ mod tests {
 			);
 
 			<AvailabilityBitfields<Test>>::insert(
-				&4,
+				&ValidatorIndex(4),
 				AvailabilityBitfieldRecord {
 					bitfield: default_bitfield(),
 					submitted_at: 9,
@@ -2435,9 +2435,9 @@ mod tests {
 			assert_eq!(Validators::get(), validator_public);
 			assert_eq!(shared::Module::<Test>::session_index(), 5);
 
-			assert!(<AvailabilityBitfields<Test>>::get(&0).is_some());
-			assert!(<AvailabilityBitfields<Test>>::get(&1).is_some());
-			assert!(<AvailabilityBitfields<Test>>::get(&4).is_some());
+			assert!(<AvailabilityBitfields<Test>>::get(&ValidatorIndex(0)).is_some());
+			assert!(<AvailabilityBitfields<Test>>::get(&ValidatorIndex(1)).is_some());
+			assert!(<AvailabilityBitfields<Test>>::get(&ValidatorIndex(4)).is_some());
 
 			assert!(<PendingAvailability<Test>>::get(&chain_a).is_some());
 			assert!(<PendingAvailability<Test>>::get(&chain_b).is_some());
@@ -2459,9 +2459,9 @@ mod tests {
 			assert_eq!(Validators::get(), validator_public_new);
 			assert_eq!(shared::Module::<Test>::session_index(), 6);
 
-			assert!(<AvailabilityBitfields<Test>>::get(&0).is_none());
-			assert!(<AvailabilityBitfields<Test>>::get(&1).is_none());
-			assert!(<AvailabilityBitfields<Test>>::get(&4).is_none());
+			assert!(<AvailabilityBitfields<Test>>::get(&ValidatorIndex(0)).is_none());
+			assert!(<AvailabilityBitfields<Test>>::get(&ValidatorIndex(1)).is_none());
+			assert!(<AvailabilityBitfields<Test>>::get(&ValidatorIndex(4)).is_none());
 
 			assert!(<PendingAvailability<Test>>::get(&chain_a).is_none());
 			assert!(<PendingAvailability<Test>>::get(&chain_b).is_none());

From 00e2f69058136bd96af7ec3bc013e4eec0c5c74d Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Fri, 19 Feb 2021 22:10:41 +0100
Subject: [PATCH 33/60] Fix test.

---
 node/core/approval-voting/src/import.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/node/core/approval-voting/src/import.rs b/node/core/approval-voting/src/import.rs
index 541c8ff4273d..78d0ecc863c5 100644
--- a/node/core/approval-voting/src/import.rs
+++ b/node/core/approval-voting/src/import.rs
@@ -1547,7 +1547,7 @@ mod tests {
 			validators: vec![Sr25519Keyring::Alice.public().into(); 6],
 			discovery_keys: Vec::new(),
 			assignment_keys: Vec::new(),
-			validator_groups: vec![vec![ValidatorIndex(0), ValidatorIndex(5)], vec![ValidatorIndex(0), ValidatorIndex(2)]],
+			validator_groups: vec![vec![ValidatorIndex(0); 5], vec![ValidatorIndex(0); 2]],
 			n_cores: 6,
 			needed_approvals: 2,
 			zeroth_delay_tranche_width: irrelevant,

From c837d98ff7de9a287d12cabd909196cbe89119d0 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Fri, 19 Feb 2021 22:38:09 +0100
Subject: [PATCH 34/60] Cleanup + get rid of some warnings.

---
 node/network/availability-distribution/src/lib.rs        | 9 ++++-----
 node/network/availability-distribution/src/requester.rs  | 6 ++----
 .../src/requester/fetch_task.rs                          | 4 ++--
 node/network/availability-distribution/src/responder.rs  | 2 +-
 4 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs
index ab68a899582b..4358511a62b8 100644
--- a/node/network/availability-distribution/src/lib.rs
+++ b/node/network/availability-distribution/src/lib.rs
@@ -42,15 +42,14 @@ mod session_cache;
 const LOG_TARGET: &'static str = "availability_distribution";
 
 /// Availability Distribution metrics.
-/// TODO: Dummy for now.
 type Metrics = ();
 
 /// The availability distribution subsystem.
 pub struct AvailabilityDistributionSubsystem {
 	/// Pointer to a keystore, which is required for determining this nodes validator index.
 	keystore: SyncCryptoStorePtr,
-	/// Prometheus metrics.
-	metrics: Metrics,
+	//// Prometheus metrics.
+	// metrics: Metrics,
 }
 
 impl<Context> Subsystem<Context> for AvailabilityDistributionSubsystem
@@ -72,8 +71,8 @@ where
 
 impl AvailabilityDistributionSubsystem {
 	/// Create a new instance of the availability distribution.
-	pub fn new(keystore: SyncCryptoStorePtr, metrics: Metrics) -> Self {
-		Self { keystore, metrics }
+	pub fn new(keystore: SyncCryptoStorePtr, _metrics: Metrics) -> Self {
+		Self { keystore }
 	}
 
 	/// Start processing work as passed on from the Overseer.
diff --git a/node/network/availability-distribution/src/requester.rs b/node/network/availability-distribution/src/requester.rs
index 799d149b2c10..8d033d474b46 100644
--- a/node/network/availability-distribution/src/requester.rs
+++ b/node/network/availability-distribution/src/requester.rs
@@ -57,8 +57,6 @@ pub struct Requester {
 	fetches: HashMap<CandidateHash, FetchTask>,
 
 	/// Localized information about sessions we are currently interested in.
-	///
-	/// This is the current one and the last one.
 	session_cache: SessionCache,
 
 	/// Sender to be cloned for `FetchTask`s.
@@ -73,7 +71,7 @@ impl Requester {
 	///
 	/// You must feed it with `ActiveLeavesUpdate` via `update_fetching_heads` and make it progress
 	/// by advancing the stream.
-	pub(crate) fn new(keystore: SyncCryptoStorePtr) -> Self {
+	pub fn new(keystore: SyncCryptoStorePtr) -> Self {
 		// All we do is forwarding messages, no need to make this big.
 		let (tx, rx) = mpsc::channel(1);
 		Requester {
@@ -86,7 +84,7 @@ impl Requester {
 	/// Update heads that need availability distribution.
 	///
 	/// For all active heads we will be fetching our chunks for availabilty distribution.
-	pub(crate) async fn update_fetching_heads<Context>(
+	pub async fn update_fetching_heads<Context>(
 		&mut self,
 		ctx: &mut Context,
 		update: ActiveLeavesUpdate,
diff --git a/node/network/availability-distribution/src/requester/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task.rs
index f19dc98ba743..329f5051f0b4 100644
--- a/node/network/availability-distribution/src/requester/fetch_task.rs
+++ b/node/network/availability-distribution/src/requester/fetch_task.rs
@@ -58,7 +58,7 @@ pub struct FetchTask {
 	/// stop keeping track of that candidate/chunk.
 	live_in: HashSet<Hash>,
 
-	/// We keep the task around in state `Fetched` until `live_in` becomes empty, to make
+	/// We keep the task around in until `live_in` becomes empty, to make
 	/// sure we won't re-fetch an already fetched candidate.
 	state: FetchedState,
 }
@@ -80,7 +80,7 @@ pub enum FromFetchTask {
 
 	/// Concluded with result.
 	///
-	/// In case of `None` everything was fine, in case of `Some` some validators in the group
+	/// In case of `None` everything was fine, in case of `Some`, some validators in the group
 	/// did not serve us our chunk as expected.
 	Concluded(Option<BadValidators>),
 }
diff --git a/node/network/availability-distribution/src/responder.rs b/node/network/availability-distribution/src/responder.rs
index 23ec112030df..8208ed39c057 100644
--- a/node/network/availability-distribution/src/responder.rs
+++ b/node/network/availability-distribution/src/responder.rs
@@ -14,7 +14,7 @@
 // You should have received a copy of the GNU General Public License
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
 
-//! Responder answers requests for availability chunks.
+//! Answer requests for availability chunks.
 
 use futures::channel::oneshot;
 

From 8945fbb8a2d86bcfe5a45fd951267d6f2bc5e81e Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Mon, 22 Feb 2021 19:07:51 +0100
Subject: [PATCH 35/60] state -> requester

---
 node/network/availability-distribution/src/lib.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs
index 4358511a62b8..3226d9d972d5 100644
--- a/node/network/availability-distribution/src/lib.rs
+++ b/node/network/availability-distribution/src/lib.rs
@@ -80,13 +80,13 @@ impl AvailabilityDistributionSubsystem {
 	where
 		Context: SubsystemContext<Message = AvailabilityDistributionMessage> + Sync + Send,
 	{
-		let mut state = Requester::new(self.keystore.clone()).fuse();
+		let mut requester = Requester::new(self.keystore.clone()).fuse();
 		loop {
 			let action = {
 				let mut subsystem_next = ctx.recv().fuse();
 				futures::select! {
 					subsystem_msg = subsystem_next => Either::Left(subsystem_msg),
-					from_task = state.next() => Either::Right(from_task),
+					from_task = requester.next() => Either::Right(from_task),
 				}
 			};
 
@@ -104,7 +104,7 @@ impl AvailabilityDistributionSubsystem {
 			match message {
 				FromOverseer::Signal(OverseerSignal::ActiveLeaves(update)) => {
 					// Update the relay chain heads we are fetching our pieces for:
-					state
+					requester
 						.get_mut()
 						.update_fetching_heads(&mut ctx, update)
 						.await?;

From c9984fbd6638cc5cfd87077a5db7072e3baee40a Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Mon, 22 Feb 2021 19:43:07 +0100
Subject: [PATCH 36/60] Mostly doc fixes.

---
 .../src/requester.rs                          |  8 +++--
 .../src/requester/fetch_task.rs               | 11 +++++--
 .../src/responder.rs                          |  1 +
 .../src/session_cache.rs                      | 29 +++++++++++--------
 4 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/node/network/availability-distribution/src/requester.rs b/node/network/availability-distribution/src/requester.rs
index 8d033d474b46..4218321b4938 100644
--- a/node/network/availability-distribution/src/requester.rs
+++ b/node/network/availability-distribution/src/requester.rs
@@ -123,7 +123,7 @@ impl Requester {
 	/// Stop requesting chunks for obsolete heads.
 	///
 	fn stop_requesting_chunks(&mut self, obsolete_leaves: impl Iterator<Item = Hash>) {
-		let obsolete_leaves: HashSet<_> = obsolete_leaves.into_iter().collect();
+		let obsolete_leaves: HashSet<_> = obsolete_leaves.collect();
 		self.fetches.retain(|_, task| {
 			task.remove_leaves(&obsolete_leaves);
 			task.is_live()
@@ -155,6 +155,7 @@ impl Requester {
 				}
 				Entry::Vacant(e) => {
 					let tx = self.tx.clone();
+
 					let task_cfg = self
 						.session_cache
 						.with_session_info(
@@ -163,6 +164,7 @@ impl Requester {
 							|info| FetchTaskConfig::new(leaf, &core, tx, info),
 						)
 						.await?;
+
 					if let Some(task_cfg) = task_cfg {
 						e.insert(FetchTask::start(task_cfg, ctx).await?);
 					}
@@ -200,8 +202,8 @@ impl Stream for Requester {
 	}
 }
 
-///// Query all hashes and descriptors of candidates pending availability at a particular block.
-// #[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))]
+/// Query all hashes and descriptors of candidates pending availability at a particular block.
+#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))]
 async fn query_occupied_cores<Context>(
 	ctx: &mut Context,
 	relay_parent: Hash,
diff --git a/node/network/availability-distribution/src/requester/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task.rs
index 329f5051f0b4..19315eab010e 100644
--- a/node/network/availability-distribution/src/requester/fetch_task.rs
+++ b/node/network/availability-distribution/src/requester/fetch_task.rs
@@ -50,6 +50,7 @@ pub struct FetchTaskConfig {
 	live_in: HashSet<Hash>,
 }
 
+/// Information about a task fetching an erasure chunk.
 pub struct FetchTask {
 	/// For what relay parents this task is relevant.
 	///
@@ -90,7 +91,9 @@ struct RunningTask {
 	/// For what session we have been spawned.
 	session_index: SessionIndex,
 
-	/// Index of validator group.
+	/// Index of validator group to fetch the chunk from.
+	///
+	/// Needef for reporting bad validators.
 	group_index: GroupIndex,
 
 	/// Validators to request the chunk from.
@@ -134,7 +137,9 @@ impl FetchTaskConfig {
 		let prepared_running = RunningTask {
 			session_index: session_info.session_index,
 			group_index: core.group_responsible,
-			group: session_info.validator_groups.get(core.group_responsible.0 as usize).expect("The responsible group of a candidate should be available in the corresponding session. qed.").clone(),
+			group: session_info.validator_groups.get(core.group_responsible.0 as usize)
+				.expect("The responsible group of a candidate should be available in the corresponding session. qed.")
+				.clone(),
 			request: AvailabilityFetchingRequest {
 				candidate_hash: core.candidate_hash,
 				index: session_info.our_index,
@@ -184,7 +189,7 @@ impl FetchTask {
 
 	/// Add the given leaf to the relay parents which are making this task relevant.
 	///
-	/// This is for book keeping, so we know we are already fetching a chunk.
+	/// This is for book keeping, so we know we are already fetching a given chunk.
 	pub fn add_leaf(&mut self, leaf: Hash) {
 		self.live_in.insert(leaf);
 	}
diff --git a/node/network/availability-distribution/src/responder.rs b/node/network/availability-distribution/src/responder.rs
index 8208ed39c057..1d6e886edf80 100644
--- a/node/network/availability-distribution/src/responder.rs
+++ b/node/network/availability-distribution/src/responder.rs
@@ -46,6 +46,7 @@ where
 	req.send_response(response).map_err(|_| Error::SendResponse)
 }
 
+/// Query chunk from the availability store.
 #[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))]
 async fn query_chunk<Context>(
 	ctx: &mut Context,
diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs
index 874671fbefcb..d10d59b7cc01 100644
--- a/node/network/availability-distribution/src/session_cache.rs
+++ b/node/network/availability-distribution/src/session_cache.rs
@@ -40,7 +40,6 @@ use super::{
 /// Caching of session info as needed by availability distribution.
 ///
 /// It should be ensured that a cached session stays live in the cache as long as we might need it.
-/// A warning will be logged, if an already dead entry gets fetched.
 pub struct SessionCache {
 	/// Get the session index for a given relay parent.
 	///
@@ -52,7 +51,8 @@ pub struct SessionCache {
 	///
 	/// Note: Performance of fetching is really secondary here, but we need to ensure we are going
 	/// to get any existing cache entry, before fetching new information, as we should not mess up
-	/// the order of validators. (We want live TCP connections wherever possible.)
+	/// the order of validators in `SessionInfo::validator_groups`. (We want live TCP connections
+	/// wherever possible.)
 	session_info_cache: LruCache<SessionIndex, SessionInfo>,
 
 	/// Key store for determining whether we are a validator and what `ValidatorIndex` we have.
@@ -64,32 +64,38 @@ pub struct SessionCache {
 pub struct SessionInfo {
 	/// The index of this session.
 	pub session_index: SessionIndex,
+
 	/// Validator groups of the current session.
 	///
 	/// Each group's order is randomized. This way we achieve load balancing when requesting
 	/// chunks, as the validators in a group will be tried in that randomized order. Each node
-	/// should arrive at a different order, therefore we distribute the load.
+	/// should arrive at a different order, therefore we distribute the load on individual
+	/// validators.
 	pub validator_groups: Vec<Vec<AuthorityDiscoveryId>>,
 
 	/// Information about ourself:
 	pub our_index: ValidatorIndex,
 
-	/// Remember to which group we belong, so we won't start fetching chunks for candidates those
-	/// candidates (We should have them via PoV distribution).
+	/// Remember to which group we belong, so we won't start fetching chunks for candidates with
+	/// our group being responsible. (We should have that chunk already.)
 	pub our_group: GroupIndex,
 }
 
 /// Report of bad validators.
+///
+/// Fetching tasks will report back validators that did not respond as expected, so we can re-order
+/// them.
 pub struct BadValidators {
 	/// The session index that was used.
 	pub session_index: SessionIndex,
-	/// The group the not properly responding validators are.
+	/// The group, the not properly responding validators belong to.
 	pub group_index: GroupIndex,
-	/// The indeces of the bad validators.
+	/// The list of bad validators.
 	pub bad_validators: Vec<AuthorityDiscoveryId>,
 }
 
 impl SessionCache {
+	/// Create a new `SessionCache`.
 	pub fn new(keystore: SyncCryptoStorePtr) -> Self {
 		SessionCache {
 			// 5 relatively conservative, 1 to 2 should suffice:
@@ -104,7 +110,7 @@ impl SessionCache {
 	///
 	/// If this node is not a validator, the function will return `None`.
 	///
-	/// Use this function over `fetch_session_info` if all you need is a reference to
+	/// Use this function over any `fetch_session_info` if all you need is a reference to
 	/// `SessionInfo`, as it avoids an expensive clone.
 	pub async fn with_session_info<Context, F, R>(
 		&mut self,
@@ -170,7 +176,7 @@ impl SessionCache {
 	/// Query needed information from runtime.
 	///
 	/// We need to pass in the relay parent for our call to `request_session_info_ctx`. We should
-	/// actually don't need that, I suppose it is used for internal caching based on relay parents,
+	/// actually don't need that: I suppose it is used for internal caching based on relay parents,
 	/// which we don't use here. It should not do any harm though.
 	async fn query_info_from_runtime<Context>(
 		&self,
@@ -204,7 +210,6 @@ impl SessionCache {
 						}
 					})
 				})
-				// TODO: Make sure this is correct and should be enforced:
 				.expect("Every validator should be in a validator group. qed.");
 
 			// Shuffle validators in groups:
@@ -237,9 +242,9 @@ impl SessionCache {
 		return Ok(None);
 	}
 
-	/// Get our validator id and the validators in the current session.
+	/// Get our `ValidatorIndex`.
 	///
-	/// Returns: Ok(None) if we are not a validator.
+	/// Returns: None if we are not a validator.
 	async fn get_our_index(&self, validators: Vec<ValidatorId>) -> Option<ValidatorIndex> {
 		for (i, v) in validators.iter().enumerate() {
 			if CryptoStore::has_keys(&*self.keystore, &[(v.to_raw_vec(), ValidatorId::ID)])

From 770775949db41c9a4cb0ff8884c070b07b76f7bc Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Mon, 22 Feb 2021 20:06:35 +0100
Subject: [PATCH 37/60] Fix test suite.

---
 node/network/availability-distribution/src/requester.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/node/network/availability-distribution/src/requester.rs b/node/network/availability-distribution/src/requester.rs
index 4218321b4938..f6fe101b500b 100644
--- a/node/network/availability-distribution/src/requester.rs
+++ b/node/network/availability-distribution/src/requester.rs
@@ -39,7 +39,7 @@ use polkadot_subsystem::{
 	messages::AllMessages, ActiveLeavesUpdate, jaeger, SubsystemContext,
 };
 
-use super::{error::recv_runtime, session_cache::SessionCache, Result};
+use super::{error::recv_runtime, session_cache::SessionCache, Result, LOG_TARGET};
 
 /// A task fetching a particular chunk.
 mod fetch_task;

From e7623d4df62d356db88c303dc2631aad08b6adf2 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Mon, 22 Feb 2021 21:04:02 +0100
Subject: [PATCH 38/60] Get rid of now redundant message types.

---
 .../availability-distribution/src/lib.rs        |  5 -----
 node/network/bridge/src/lib.rs                  | 17 +++--------------
 node/network/protocol/src/lib.rs                | 12 ------------
 node/subsystem/src/messages.rs                  |  3 ---
 4 files changed, 3 insertions(+), 34 deletions(-)

diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs
index 3226d9d972d5..9912c4c85242 100644
--- a/node/network/availability-distribution/src/lib.rs
+++ b/node/network/availability-distribution/src/lib.rs
@@ -118,11 +118,6 @@ impl AvailabilityDistributionSubsystem {
 				} => {
 					answer_request(&mut ctx, req).await?
 				}
-				FromOverseer::Communication {
-					msg: AvailabilityDistributionMessage::NetworkBridgeUpdateV1(_),
-				} => {
-					// There are currently no bridge updates we are interested in.
-				}
 			}
 		}
 	}
diff --git a/node/network/bridge/src/lib.rs b/node/network/bridge/src/lib.rs
index 720cea102168..309b560bb7e2 100644
--- a/node/network/bridge/src/lib.rs
+++ b/node/network/bridge/src/lib.rs
@@ -28,8 +28,8 @@ use polkadot_subsystem::{
 	SubsystemResult, jaeger,
 };
 use polkadot_subsystem::messages::{
-	NetworkBridgeMessage, AllMessages, AvailabilityDistributionMessage,
-	BitfieldDistributionMessage, PoVDistributionMessage, StatementDistributionMessage,
+	NetworkBridgeMessage, AllMessages, BitfieldDistributionMessage,
+	PoVDistributionMessage, StatementDistributionMessage,
 	CollatorProtocolMessage, ApprovalDistributionMessage, NetworkBridgeEvent,
 };
 use polkadot_primitives::v1::{Hash, BlockNumber};
@@ -567,10 +567,6 @@ async fn dispatch_validation_events_to_all<I>(
 		I::IntoIter: Send,
 {
 	let messages_for = |event: NetworkBridgeEvent<protocol_v1::ValidationProtocol>| {
-		let a = std::iter::once(event.focus().ok().map(|m| AllMessages::AvailabilityDistribution(
-			AvailabilityDistributionMessage::NetworkBridgeUpdateV1(m)
-		)));
-
 		let b = std::iter::once(event.focus().ok().map(|m| AllMessages::BitfieldDistribution(
 			BitfieldDistributionMessage::NetworkBridgeUpdateV1(m)
 		)));
@@ -587,7 +583,7 @@ async fn dispatch_validation_events_to_all<I>(
 			ApprovalDistributionMessage::NetworkBridgeUpdateV1(m)
 		)));
 
-		a.chain(b).chain(p).chain(s).chain(ap).filter_map(|x| x)
+		b.chain(p).chain(s).chain(ap).filter_map(|x| x)
 	};
 
 	ctx.send_messages(events.into_iter().flat_map(messages_for)).await
@@ -817,13 +813,6 @@ mod tests {
 		event: NetworkBridgeEvent<protocol_v1::ValidationProtocol>,
 		virtual_overseer: &mut TestSubsystemContextHandle<NetworkBridgeMessage>,
 	) {
-		assert_matches!(
-			virtual_overseer.recv().await,
-			AllMessages::AvailabilityDistribution(
-				AvailabilityDistributionMessage::NetworkBridgeUpdateV1(e)
-			) if e == event.focus().expect("could not focus message")
-		);
-
 		assert_matches!(
 			virtual_overseer.recv().await,
 			AllMessages::BitfieldDistribution(
diff --git a/node/network/protocol/src/lib.rs b/node/network/protocol/src/lib.rs
index 2f7547f2de46..3d5d8351bb68 100644
--- a/node/network/protocol/src/lib.rs
+++ b/node/network/protocol/src/lib.rs
@@ -252,14 +252,6 @@ pub mod v1 {
 	use super::RequestId;
 	use std::convert::TryFrom;
 
-	/// Network messages used by the availability distribution subsystem
-	#[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)]
-	pub enum AvailabilityDistributionMessage {
-		/// An erasure chunk for a given candidate hash.
-		#[codec(index = 0)]
-		Chunk(CandidateHash, ErasureChunk),
-	}
-
 	/// Network messages used by the availability recovery subsystem.
 	#[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)]
 	pub enum AvailabilityRecoveryMessage {
@@ -408,9 +400,6 @@ pub mod v1 {
 	/// All network messages on the validation peer-set.
 	#[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)]
 	pub enum ValidationProtocol {
-		/// Availability distribution messages
-		#[codec(index = 0)]
-		AvailabilityDistribution(AvailabilityDistributionMessage),
 		/// Bitfield distribution messages
 		#[codec(index = 1)]
 		BitfieldDistribution(BitfieldDistributionMessage),
@@ -428,7 +417,6 @@ pub mod v1 {
 		ApprovalDistribution(ApprovalDistributionMessage),
 	}
 
-	impl_try_from!(ValidationProtocol, AvailabilityDistribution, AvailabilityDistributionMessage);
 	impl_try_from!(ValidationProtocol, BitfieldDistribution, BitfieldDistributionMessage);
 	impl_try_from!(ValidationProtocol, PoVDistribution, PoVDistributionMessage);
 	impl_try_from!(ValidationProtocol, StatementDistribution, StatementDistributionMessage);
diff --git a/node/subsystem/src/messages.rs b/node/subsystem/src/messages.rs
index 857b1e90ae96..629b25df881e 100644
--- a/node/subsystem/src/messages.rs
+++ b/node/subsystem/src/messages.rs
@@ -269,8 +269,6 @@ impl NetworkBridgeMessage {
 /// Availability Distribution Message.
 #[derive(Debug, derive_more::From)]
 pub enum AvailabilityDistributionMessage {
-	/// Event from the network bridge.
-	NetworkBridgeUpdateV1(NetworkBridgeEvent<protocol_v1::AvailabilityDistributionMessage>),
 	/// Incoming request for an availability chunk.
 	AvailabilityFetchingRequest(IncomingRequest<req_res_v1::AvailabilityFetchingRequest>)
 }
@@ -293,7 +291,6 @@ impl AvailabilityDistributionMessage {
 	/// If the current variant contains the relay parent hash, return it.
 	pub fn relay_parent(&self) -> Option<Hash> {
 		match self {
-			Self::NetworkBridgeUpdateV1(_) => None,
 			Self::AvailabilityFetchingRequest(_) => None,
 		}
 	}

From e8d7e44cd3c95412d34018593ac038522d8037b8 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Mon, 22 Feb 2021 22:31:24 +0100
Subject: [PATCH 39/60] WIP

---
 node/network/availability-distribution/src/lib.rs | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs
index 9912c4c85242..d8389d4933d2 100644
--- a/node/network/availability-distribution/src/lib.rs
+++ b/node/network/availability-distribution/src/lib.rs
@@ -39,17 +39,18 @@ use responder::answer_request;
 /// Cache for session information.
 mod session_cache;
 
-const LOG_TARGET: &'static str = "availability_distribution";
+mod metrics;
+/// Prometheus `Metrics` for availability distribution.
+pub use metrics::Metrics;
 
-/// Availability Distribution metrics.
-type Metrics = ();
+const LOG_TARGET: &'static str = "availability_distribution";
 
 /// The availability distribution subsystem.
 pub struct AvailabilityDistributionSubsystem {
 	/// Pointer to a keystore, which is required for determining this nodes validator index.
 	keystore: SyncCryptoStorePtr,
-	//// Prometheus metrics.
-	// metrics: Metrics,
+	/// Prometheus metrics.
+	metrics: Metrics,
 }
 
 impl<Context> Subsystem<Context> for AvailabilityDistributionSubsystem
@@ -71,8 +72,8 @@ where
 
 impl AvailabilityDistributionSubsystem {
 	/// Create a new instance of the availability distribution.
-	pub fn new(keystore: SyncCryptoStorePtr, _metrics: Metrics) -> Self {
-		Self { keystore }
+	pub fn new(keystore: SyncCryptoStorePtr, metrics: Metrics) -> Self {
+		Self { keystore, metrics }
 	}
 
 	/// Start processing work as passed on from the Overseer.

From 5fb84180e0f2a2db1f4ed27c94683d3609fb02c0 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Mon, 22 Feb 2021 22:18:46 +0100
Subject: [PATCH 40/60] Rob's review remarks.

---
 .../availability-distribution/src/requester/fetch_task.rs     | 2 +-
 node/network/availability-distribution/src/session_cache.rs   | 3 ++-
 node/network/protocol/src/request_response/v1.rs              | 4 ++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/node/network/availability-distribution/src/requester/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task.rs
index 19315eab010e..05f70ec5a8cb 100644
--- a/node/network/availability-distribution/src/requester/fetch_task.rs
+++ b/node/network/availability-distribution/src/requester/fetch_task.rs
@@ -262,7 +262,7 @@ impl RunningTask {
 			};
 			let chunk = match resp {
 				AvailabilityFetchingResponse::Chunk(resp) => {
-					resp.reconstruct_erasure_chunk(&self.request)
+					resp.recombine_into_chunk(&self.request)
 				}
 				AvailabilityFetchingResponse::NoSuchChunk => {
 					tracing::debug!(target: LOG_TARGET, validator = ?validator, "Validator did not have our chunk");
diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs
index d10d59b7cc01..672ada0896bc 100644
--- a/node/network/availability-distribution/src/session_cache.rs
+++ b/node/network/availability-distribution/src/session_cache.rs
@@ -225,7 +225,8 @@ impl SessionCache {
 						.into_iter()
 						.map(|index| {
 							discovery_keys.get(index.0 as usize)
-							.expect("There should be a discovery key for each validator of each validator group. qed.").clone()
+								.expect("There should be a discovery key for each validator of each validator group. qed.")
+								.clone()
 						})
 						.collect()
 				})
diff --git a/node/network/protocol/src/request_response/v1.rs b/node/network/protocol/src/request_response/v1.rs
index 22724c1f44b7..4f8c968b8fd5 100644
--- a/node/network/protocol/src/request_response/v1.rs
+++ b/node/network/protocol/src/request_response/v1.rs
@@ -59,13 +59,13 @@ pub struct ChunkResponse {
 
 impl From<ErasureChunk> for ChunkResponse {
 	fn from(ErasureChunk {chunk, index: _, proof}: ErasureChunk) -> Self {
-		ChunkResponse { chunk, proof}
+		ChunkResponse {chunk, proof}
 	}
 }
 
 impl ChunkResponse {
 	/// Re-build an `ErasureChunk` from response and request.
-	pub fn reconstruct_erasure_chunk(self, req: &AvailabilityFetchingRequest) -> ErasureChunk {
+	pub fn recombine_into_chunk(self, req: &AvailabilityFetchingRequest) -> ErasureChunk {
 		ErasureChunk {
 			chunk: self.chunk,
 			proof: self.proof,

From 9780f3a3c8329a73bb8a585ebdf3fb16cd9e270f Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Mon, 22 Feb 2021 23:13:50 +0100
Subject: [PATCH 41/60] Fix test suite.

---
 node/overseer/src/lib.rs | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/node/overseer/src/lib.rs b/node/overseer/src/lib.rs
index cd9f1d4f1d89..da0ebca25ed6 100644
--- a/node/overseer/src/lib.rs
+++ b/node/overseer/src/lib.rs
@@ -2717,10 +2717,6 @@ mod tests {
 		StatementDistributionMessage::NetworkBridgeUpdateV1(test_network_bridge_event())
 	}
 
-	fn test_availability_distribution_msg() -> AvailabilityDistributionMessage {
-		AvailabilityDistributionMessage::NetworkBridgeUpdateV1(test_network_bridge_event())
-	}
-
 	fn test_availability_recovery_msg() -> AvailabilityRecoveryMessage {
 		let (sender, _) = oneshot::channel();
 		AvailabilityRecoveryMessage::RecoverAvailableData(
@@ -2828,7 +2824,6 @@ mod tests {
 			handler.send_msg(AllMessages::CollationGeneration(test_collator_generation_msg())).await;
 			handler.send_msg(AllMessages::CollatorProtocol(test_collator_protocol_msg())).await;
 			handler.send_msg(AllMessages::StatementDistribution(test_statement_distribution_msg())).await;
-			handler.send_msg(AllMessages::AvailabilityDistribution(test_availability_distribution_msg())).await;
 			handler.send_msg(AllMessages::AvailabilityRecovery(test_availability_recovery_msg())).await;
 			// handler.send_msg(AllMessages::BitfieldSigning(test_bitfield_signing_msg())).await;
 			handler.send_msg(AllMessages::BitfieldDistribution(test_bitfield_distribution_msg())).await;
@@ -2851,8 +2846,8 @@ mod tests {
 					assert_eq!(stop_signals_received.load(atomic::Ordering::SeqCst), NUM_SUBSYSTEMS);
 					// x2 because of broadcast_signal on startup
 					assert_eq!(signals_received.load(atomic::Ordering::SeqCst), NUM_SUBSYSTEMS);
-					// -1 for BitfieldSigning
-					assert_eq!(msgs_received.load(atomic::Ordering::SeqCst), NUM_SUBSYSTEMS - 1);
+					// -2 for BitfieldSigning and Availability distribution
+					assert_eq!(msgs_received.load(atomic::Ordering::SeqCst), NUM_SUBSYSTEMS - 2);
 
 					assert!(res.is_ok());
 				},

From 5bbcea45cd0a5401e2f91395797f585005605c05 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Mon, 22 Feb 2021 23:13:58 +0100
Subject: [PATCH 42/60] core.relay_parent -> leaf for session request.

---
 node/network/availability-distribution/src/requester.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/node/network/availability-distribution/src/requester.rs b/node/network/availability-distribution/src/requester.rs
index f6fe101b500b..1a080853333c 100644
--- a/node/network/availability-distribution/src/requester.rs
+++ b/node/network/availability-distribution/src/requester.rs
@@ -160,7 +160,10 @@ impl Requester {
 						.session_cache
 						.with_session_info(
 							ctx,
-							core.candidate_descriptor.relay_parent,
+							// We use leaf here, as relay_parent must be in the same session as the
+							// leaf. (Cores are dropped at session boundaries.) At the same time,
+							// only leaves are guaranteed to be fetchable by the state trie.
+							leaf,
 							|info| FetchTaskConfig::new(leaf, &core, tx, info),
 						)
 						.await?;

From b792a89ed343cc4276d28ed39a7c04fe1d9b9ef2 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Mon, 22 Feb 2021 23:21:20 +0100
Subject: [PATCH 43/60] Style fix.

---
 .../src/requester/fetch_task.rs                        | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/node/network/availability-distribution/src/requester/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task.rs
index 05f70ec5a8cb..6b2612096a76 100644
--- a/node/network/availability-distribution/src/requester/fetch_task.rs
+++ b/node/network/availability-distribution/src/requester/fetch_task.rs
@@ -335,11 +335,11 @@ impl RunningTask {
 				Ok(hash) => hash,
 				Err(e) => {
 					tracing::trace!(
-					target: LOG_TARGET,
-					candidate_hash = ?self.request.candidate_hash,
-					origin = ?validator,
-					error = ?e,
-					"Failed to calculate chunk merkle proof",
+						target: LOG_TARGET,
+						candidate_hash = ?self.request.candidate_hash,
+						origin = ?validator,
+						error = ?e,
+						"Failed to calculate chunk merkle proof",
 					);
 					return false;
 				}

From 75e6af8728be9b0f8287557114b2f2f3d2a78ad5 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Mon, 22 Feb 2021 23:26:59 +0100
Subject: [PATCH 44/60] Decrease request timeout.

---
 node/network/protocol/src/request_response.rs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/node/network/protocol/src/request_response.rs b/node/network/protocol/src/request_response.rs
index 7d30fe76358f..2160d1905cab 100644
--- a/node/network/protocol/src/request_response.rs
+++ b/node/network/protocol/src/request_response.rs
@@ -60,7 +60,11 @@ pub enum Protocol {
 }
 
 /// Default request timeout in seconds.
-const DEFAULT_REQUEST_TIMEOUT: u64 = 8; 
+///
+/// When decreasing this value, take into account that the very first request might need to open a
+/// connection, which can be slow. If this causes problems, we should ensure connectivity via peer
+/// sets.
+const DEFAULT_REQUEST_TIMEOUT: u64 = 3; 
 
 impl Protocol {
 	/// Get a configuration for a given Request response protocol.

From 53fdeb34d067d6ab824e1331348369798f798160 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Tue, 23 Feb 2021 18:56:54 +0100
Subject: [PATCH 45/60] Cleanup obsolete errors.

---
 node/network/availability-distribution/src/error.rs | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs
index f66d418d8e35..354e9c255e32 100644
--- a/node/network/availability-distribution/src/error.rs
+++ b/node/network/availability-distribution/src/error.rs
@@ -28,15 +28,9 @@ use polkadot_subsystem::{errors::RuntimeApiError, SubsystemError};
 /// Errors of this subsystem.
 #[derive(Debug, Error)]
 pub enum Error {
-	#[error("Response channel to obtain StoreChunk failed")]
-	StoreChunkResponseChannel(#[source] oneshot::Canceled),
-
 	#[error("Response channel to obtain QueryChunk failed")]
 	QueryChunkResponseChannel(#[source] oneshot::Canceled),
 
-	#[error("Response channel to obtain AvailabilityCores failed")]
-	QueryAvailabilityResponseChannel(#[source] oneshot::Canceled),
-
 	#[error("Receive channel closed")]
 	IncomingMessageChannel(#[source] SubsystemError),
 

From ce21a10bb1479f35b145ac35eaf30477399d0e48 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Tue, 23 Feb 2021 19:00:30 +0100
Subject: [PATCH 46/60] Metrics + don't fail on non fatal errors.

---
 .../availability-distribution/src/lib.rs      |   8 +-
 .../availability-distribution/src/metrics.rs  | 112 ++++++++++++++++++
 .../src/requester.rs                          |  35 +++---
 .../src/requester/fetch_task.rs               |  23 +++-
 .../src/responder.rs                          |  37 +++++-
 .../src/session_cache.rs                      |  15 +++
 6 files changed, 207 insertions(+), 23 deletions(-)
 create mode 100644 node/network/availability-distribution/src/metrics.rs

diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs
index d8389d4933d2..4e8683a0920e 100644
--- a/node/network/availability-distribution/src/lib.rs
+++ b/node/network/availability-distribution/src/lib.rs
@@ -34,7 +34,7 @@ use requester::Requester;
 
 /// Responding to erasure chunk requests:
 mod responder;
-use responder::answer_request;
+use responder::answer_request_log;
 
 /// Cache for session information.
 mod session_cache;
@@ -81,7 +81,7 @@ impl AvailabilityDistributionSubsystem {
 	where
 		Context: SubsystemContext<Message = AvailabilityDistributionMessage> + Sync + Send,
 	{
-		let mut requester = Requester::new(self.keystore.clone()).fuse();
+		let mut requester = Requester::new(self.keystore.clone(), self.metrics.clone()).fuse();
 		loop {
 			let action = {
 				let mut subsystem_next = ctx.recv().fuse();
@@ -97,7 +97,7 @@ impl AvailabilityDistributionSubsystem {
 					subsystem_msg.map_err(|e| Error::IncomingMessageChannel(e))?
 				}
 				Either::Right(from_task) => {
-					let from_task = from_task.ok_or(Error::RequesterExhausted)??;
+					let from_task = from_task.ok_or(Error::RequesterExhausted)?;
 					ctx.send_message(from_task).await;
 					continue;
 				}
@@ -117,7 +117,7 @@ impl AvailabilityDistributionSubsystem {
 				FromOverseer::Communication {
 					msg: AvailabilityDistributionMessage::AvailabilityFetchingRequest(req),
 				} => {
-					answer_request(&mut ctx, req).await?
+					answer_request_log(&mut ctx, req, &self.metrics).await
 				}
 			}
 		}
diff --git a/node/network/availability-distribution/src/metrics.rs b/node/network/availability-distribution/src/metrics.rs
new file mode 100644
index 000000000000..1e44028a1ef9
--- /dev/null
+++ b/node/network/availability-distribution/src/metrics.rs
@@ -0,0 +1,112 @@
+// Copyright 2021 Parity Technologies (UK) Ltd.
+// This file is part of Polkadot.
+
+// Polkadot is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Polkadot is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
+
+use polkadot_node_subsystem_util::metrics::prometheus::{Counter, U64, Registry, PrometheusError, CounterVec, Opts};
+use polkadot_node_subsystem_util::metrics::prometheus;
+use polkadot_node_subsystem_util::metrics;
+
+/// Label for success counters.
+pub const SUCCEEDED: &'static str = "succeeded";
+
+/// Label for fail counters.
+pub const FAILED: &'static str = "failed";
+
+/// Label for chunks that could not be served, because they were not available.
+pub const NOT_FOUND: &'static str = "not-found";
+
+/// Availability Distribution metrics.
+#[derive(Clone, Default)]
+pub struct Metrics(Option<MetricsInner>);
+
+
+#[derive(Clone)]
+struct MetricsInner {
+    /// Number of chunks fetched.
+	///
+	/// Note: The failed count gets incremented, when we were not able to fetch the chunk at all.
+	/// For times, where we failed downloading, but succeeded on the next try (with different
+	/// backers), see `retries`.
+    fetched_chunks: CounterVec<U64>,
+
+    /// Number of chunks served.
+	///
+	/// Note: Right now, `Succeeded` gets incremented whenever we were able to successfully respond
+	/// to a chunk request. This includes `NoSuchChunk` responses.
+    served_chunks: CounterVec<U64>,
+
+	/// Number of times our first set of validators did not provide the needed chunk and we had to
+	/// query further validators.
+	retries: Counter<U64>,
+}
+
+impl Metrics {
+	/// Increment counter on fetched labels.
+	pub fn on_fetch(&self, label: &'static str) {
+		if let Some(metrics) = &self.0 {
+			metrics.fetched_chunks.with_label_values(&[label]).inc()
+		}
+	}
+
+	/// Increment counter on served chunks.
+	pub fn on_served(&self, label: &'static str) {
+		if let Some(metrics) = &self.0 {
+			metrics.served_chunks.with_label_values(&[label]).inc()
+		}
+	}
+
+	/// Increment retry counter.
+	pub fn on_retry(&self) {
+		if let Some(metrics) = &self.0 {
+			metrics.retries.inc()
+		}
+	}
+}
+
+impl metrics::Metrics for Metrics {
+	fn try_register(registry: &Registry) -> Result<Self, PrometheusError> {
+		let metrics = MetricsInner {
+			fetched_chunks: prometheus::register(
+				CounterVec::new(
+					Opts::new(
+						"Number of fetched chunks",
+						"Total number of fetched chunks.",
+					),
+					&[FAILED, SUCCEEDED]
+				)?,
+				registry,
+			)?,
+			served_chunks: prometheus::register(
+				CounterVec::new(
+					Opts::new(
+						"Number of served chunks",
+						"Total number of chunks served by this backer.",
+					),
+					&[FAILED, SUCCEEDED, NOT_FOUND]
+				)?,
+				registry,
+			)?,
+			retries: prometheus::register(
+				Counter::new(
+					"Number of retries",
+					"Number of times we did not succeed in fetching a chunk and needed to try more backers.",
+				)?,
+				registry,
+			)?,
+		};
+		Ok(Metrics(Some(metrics)))
+	}
+}
+
diff --git a/node/network/availability-distribution/src/requester.rs b/node/network/availability-distribution/src/requester.rs
index 1a080853333c..a9b95b813cde 100644
--- a/node/network/availability-distribution/src/requester.rs
+++ b/node/network/availability-distribution/src/requester.rs
@@ -39,7 +39,7 @@ use polkadot_subsystem::{
 	messages::AllMessages, ActiveLeavesUpdate, jaeger, SubsystemContext,
 };
 
-use super::{error::recv_runtime, session_cache::SessionCache, Result, LOG_TARGET};
+use super::{error::recv_runtime, session_cache::SessionCache, Result, LOG_TARGET, Metrics};
 
 /// A task fetching a particular chunk.
 mod fetch_task;
@@ -64,6 +64,9 @@ pub struct Requester {
 
 	/// Receive messages from `FetchTask`.
 	rx: mpsc::Receiver<FromFetchTask>,
+
+	/// Prometheus Metrics
+	metrics: Metrics,
 }
 
 impl Requester {
@@ -71,7 +74,7 @@ impl Requester {
 	///
 	/// You must feed it with `ActiveLeavesUpdate` via `update_fetching_heads` and make it progress
 	/// by advancing the stream.
-	pub fn new(keystore: SyncCryptoStorePtr) -> Self {
+	pub fn new(keystore: SyncCryptoStorePtr, metrics: Metrics) -> Self {
 		// All we do is forwarding messages, no need to make this big.
 		let (tx, rx) = mpsc::channel(1);
 		Requester {
@@ -79,6 +82,7 @@ impl Requester {
 			session_cache: SessionCache::new(keystore),
 			tx,
 			rx,
+			metrics,
 		}
 	}
 	/// Update heads that need availability distribution.
@@ -155,6 +159,7 @@ impl Requester {
 				}
 				Entry::Vacant(e) => {
 					let tx = self.tx.clone();
+					let metrics = self.metrics.clone();
 
 					let task_cfg = self
 						.session_cache
@@ -164,7 +169,7 @@ impl Requester {
 							// leaf. (Cores are dropped at session boundaries.) At the same time,
 							// only leaves are guaranteed to be fetchable by the state trie.
 							leaf,
-							|info| FetchTaskConfig::new(leaf, &core, tx, info),
+							|info| FetchTaskConfig::new(leaf, &core, tx, metrics, info),
 						)
 						.await?;
 
@@ -180,26 +185,26 @@ impl Requester {
 }
 
 impl Stream for Requester {
-	type Item = Result<AllMessages>;
+	type Item = AllMessages;
 
 	fn poll_next(
 		mut self: Pin<&mut Self>,
 		ctx: &mut Context,
-	) -> Poll<Option<Result<AllMessages>>> {
+	) -> Poll<Option<AllMessages>> {
 		loop {
 			match Pin::new(&mut self.rx).poll_next(ctx) {
-				Poll::Ready(Some(FromFetchTask::Message(m))) => {
-					return Poll::Ready(Some(Ok(m)))
-				}
+				Poll::Ready(Some(FromFetchTask::Message(m))) =>
+					return Poll::Ready(Some(m)),
 				Poll::Ready(Some(FromFetchTask::Concluded(Some(bad_boys)))) => {
-					match self.session_cache.report_bad(bad_boys) {
-						Err(err) => return Poll::Ready(Some(Err(err))),
-						Ok(()) => continue,
-					}
+					self.session_cache.report_bad_log(bad_boys);
+					continue
 				}
-				Poll::Ready(Some(FromFetchTask::Concluded(None))) => continue,
-				Poll::Ready(None) => return Poll::Ready(None),
-				Poll::Pending => return Poll::Pending,
+				Poll::Ready(Some(FromFetchTask::Concluded(None))) =>
+					continue,
+				Poll::Ready(None) =>
+					return Poll::Ready(None),
+				Poll::Pending =>
+					return Poll::Pending,
 			}
 		}
 	}
diff --git a/node/network/availability-distribution/src/requester/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task.rs
index 6b2612096a76..10d0821a23d0 100644
--- a/node/network/availability-distribution/src/requester/fetch_task.rs
+++ b/node/network/availability-distribution/src/requester/fetch_task.rs
@@ -39,6 +39,7 @@ use crate::{
 	error::{Error, Result},
 	session_cache::{BadValidators, SessionInfo},
 	LOG_TARGET,
+	metrics::{Metrics, SUCCEEDED, FAILED},
 };
 
 /// Configuration for a `FetchTask`
@@ -112,6 +113,9 @@ struct RunningTask {
 
 	/// Sender for communicating with other subsystems and reporting results.
 	sender: mpsc::Sender<FromFetchTask>,
+	
+	/// Prometheues metrics for reporting results.
+	metrics: Metrics,
 }
 
 impl FetchTaskConfig {
@@ -122,6 +126,7 @@ impl FetchTaskConfig {
 		leaf: Hash,
 		core: &OccupiedCore,
 		sender: mpsc::Sender<FromFetchTask>,
+		metrics: Metrics,
 		session_info: &SessionInfo,
 	) -> Self {
 		let live_in = vec![leaf].into_iter().collect();
@@ -146,6 +151,7 @@ impl FetchTaskConfig {
 			},
 			erasure_root: core.candidate_descriptor.erasure_root,
 			relay_parent: core.candidate_descriptor.relay_parent,
+			metrics,
 			sender,
 		};
 		FetchTaskConfig {
@@ -243,8 +249,16 @@ impl RunningTask {
 	/// Try validators in backing group in order.
 	async fn run_inner(mut self) {
 		let mut bad_validators = Vec::new();
+		let mut label = FAILED;
+		let mut count: u32 = 0;
 		// Try validators in reverse order:
 		while let Some(validator) = self.group.pop() {
+			// Report retries:
+			if count > 0 {
+				self.metrics.on_retry();
+			}
+			count +=1;
+
 			// Send request:
 			let resp = match self.do_request(&validator).await {
 				Ok(resp) => resp,
@@ -253,6 +267,7 @@ impl RunningTask {
 						target: LOG_TARGET,
 						"Node seems to be shutting down, canceling fetch task"
 					);
+					self.metrics.on_fetch(FAILED);
 					return
 				}
 				Err(TaskError::PeerError) => {
@@ -265,7 +280,11 @@ impl RunningTask {
 					resp.recombine_into_chunk(&self.request)
 				}
 				AvailabilityFetchingResponse::NoSuchChunk => {
-					tracing::debug!(target: LOG_TARGET, validator = ?validator, "Validator did not have our chunk");
+					tracing::debug!(
+						target: LOG_TARGET,
+						validator = ?validator,
+						"Validator did not have our chunk"
+					);
 					bad_validators.push(validator);
 					continue
 				}
@@ -279,8 +298,10 @@ impl RunningTask {
 
 			// Ok, let's store it and be happy:
 			self.store_chunk(chunk).await;
+			label = SUCCEEDED;
 			break;
 		}
+		self.metrics.on_fetch(label);
 		self.conclude(bad_validators).await;
 	}
 
diff --git a/node/network/availability-distribution/src/responder.rs b/node/network/availability-distribution/src/responder.rs
index 1d6e886edf80..c094b17fd666 100644
--- a/node/network/availability-distribution/src/responder.rs
+++ b/node/network/availability-distribution/src/responder.rs
@@ -26,24 +26,55 @@ use polkadot_subsystem::{
 };
 
 use crate::error::{Error, Result};
-use crate::LOG_TARGET;
+use crate::{LOG_TARGET, metrics::{Metrics, SUCCEEDED, FAILED, NOT_FOUND}};
+
+/// Variant of `answer_request` that does Prometheus metric and logging on errors.
+///
+/// Any errors of `answer_request` will simply be logged.
+pub async fn answer_request_log<Context>(
+	ctx: &mut Context,
+	req: IncomingRequest<v1::AvailabilityFetchingRequest>,
+	metrics: &Metrics,
+) -> ()
+where
+	Context: SubsystemContext,
+{
+	let res = answer_request(ctx, req).await;
+	match res {
+		Ok(result) =>
+			metrics.on_served(if result {SUCCEEDED} else {NOT_FOUND}),
+		Err(err) => {
+			tracing::warn!(
+				target: LOG_TARGET,
+				err= ?err,
+				"Serving chunk failed with error"
+			);
+			metrics.on_served(FAILED);
+		}
+	}
+}
 
 /// Answer an incoming chunk request by querying the av store.
+///
+/// Returns: Ok(true) if chunk was found and served.
 pub async fn answer_request<Context>(
 	ctx: &mut Context,
 	req: IncomingRequest<v1::AvailabilityFetchingRequest>,
-) -> Result<()>
+) -> Result<bool>
 where
 	Context: SubsystemContext,
 {
 	let chunk = query_chunk(ctx, req.payload.candidate_hash, req.payload.index).await?;
 
+	let result = chunk.is_some();
+
 	let response = match chunk {
 		None => v1::AvailabilityFetchingResponse::NoSuchChunk,
 		Some(chunk) => v1::AvailabilityFetchingResponse::Chunk(chunk.into()),
 	};
 
-	req.send_response(response).map_err(|_| Error::SendResponse)
+	req.send_response(response).map_err(|_| Error::SendResponse)?;
+	Ok(result)
 }
 
 /// Query chunk from the availability store.
diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs
index 672ada0896bc..d3081f35268a 100644
--- a/node/network/availability-distribution/src/session_cache.rs
+++ b/node/network/availability-distribution/src/session_cache.rs
@@ -35,6 +35,7 @@ use polkadot_subsystem::SubsystemContext;
 use super::{
 	error::{recv_runtime, Result},
 	Error,
+	LOG_TARGET,
 };
 
 /// Caching of session info as needed by availability distribution.
@@ -148,6 +149,20 @@ impl SessionCache {
 		Ok(None)
 	}
 
+	/// Variant of `report_bad` that never fails, but just logs errors.
+	///
+	/// Not being able to report bad validators is not fatal, so we should not shutdown the
+	/// subsystem on this.
+	pub fn report_bad_log(&mut self, report: BadValidators) {
+		if let Err(err) =  self.report_bad(report) {
+			tracing::warn!(
+				target: LOG_TARGET,
+				err= ?err,
+				"Reporting bad validators failed with error"
+			);
+		}
+	}
+
 	/// Make sure we try unresponsive or misbehaving validators last.
 	///
 	/// We assume validators in a group are tried in reverse order, so the reported bad validators

From 64d72469b827a59f548275724996229b3186190f Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Tue, 23 Feb 2021 19:03:07 +0100
Subject: [PATCH 47/60] requester.rs -> requester/mod.rs

---
 .../src/{requester.rs => requester/mod.rs}                        | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename node/network/availability-distribution/src/{requester.rs => requester/mod.rs} (100%)

diff --git a/node/network/availability-distribution/src/requester.rs b/node/network/availability-distribution/src/requester/mod.rs
similarity index 100%
rename from node/network/availability-distribution/src/requester.rs
rename to node/network/availability-distribution/src/requester/mod.rs

From 2a9650f58f1233a86e5a4d6344484bdfa5a907b1 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Tue, 23 Feb 2021 19:35:04 +0100
Subject: [PATCH 48/60] Panic on invalid BadValidator report.

---
 node/network/availability-distribution/src/error.rs         | 6 +++---
 node/network/availability-distribution/src/session_cache.rs | 5 +++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs
index 354e9c255e32..dbe3ad56db16 100644
--- a/node/network/availability-distribution/src/error.rs
+++ b/node/network/availability-distribution/src/error.rs
@@ -54,9 +54,9 @@ pub enum Error {
 	#[error("Spawning subsystem task failed")]
 	SpawnTask(#[source] SubsystemError),
 
-	/// Reporting bad validators failed.
-	#[error("Reporting bad validators failed")]
-	ReportBadValidators(&'static str),
+	/// We tried accessing a session that was not cached.
+	#[error("Session is not cached.")]
+	NoSuchCachedSession,
 
 	/// Requester stream exhausted.
 	#[error("Erasure chunk requester stream exhausted")]
diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs
index d3081f35268a..395d2ae78384 100644
--- a/node/network/availability-distribution/src/session_cache.rs
+++ b/node/network/availability-distribution/src/session_cache.rs
@@ -167,15 +167,16 @@ impl SessionCache {
 	///
 	/// We assume validators in a group are tried in reverse order, so the reported bad validators
 	/// will be put at the beginning of the group.
+	#[tracing::instrument(level = "trace", skip(self, report), fields(subsystem = LOG_TARGET))]
 	pub fn report_bad(&mut self, report: BadValidators) -> Result<()> {
 		let session = self
 			.session_info_cache
 			.get_mut(&report.session_index)
-			.ok_or(Error::ReportBadValidators("Session is not cached."))?;
+			.ok_or(Error::NoSuchCachedSession)?;
 		let group = session
 			.validator_groups
 			.get_mut(report.group_index.0 as usize)
-			.ok_or(Error::ReportBadValidators("Validator group not found"))?;
+			.expect("A bad validator report must contain a valid group for the reported session. qed.");
 		let bad_set = report.bad_validators.iter().collect::<HashSet<_>>();
 
 		// Get rid of bad boys:

From 4d05d008adfd71dff9255d7b5b832134368eac77 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Tue, 23 Feb 2021 19:38:56 +0100
Subject: [PATCH 49/60] Fix indentation.

---
 node/network/availability-distribution/src/metrics.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/node/network/availability-distribution/src/metrics.rs b/node/network/availability-distribution/src/metrics.rs
index 1e44028a1ef9..a70efd76071a 100644
--- a/node/network/availability-distribution/src/metrics.rs
+++ b/node/network/availability-distribution/src/metrics.rs
@@ -34,18 +34,18 @@ pub struct Metrics(Option<MetricsInner>);
 
 #[derive(Clone)]
 struct MetricsInner {
-    /// Number of chunks fetched.
+	/// Number of chunks fetched.
 	///
 	/// Note: The failed count gets incremented, when we were not able to fetch the chunk at all.
 	/// For times, where we failed downloading, but succeeded on the next try (with different
 	/// backers), see `retries`.
-    fetched_chunks: CounterVec<U64>,
+	fetched_chunks: CounterVec<U64>,
 
-    /// Number of chunks served.
+	/// Number of chunks served.
 	///
 	/// Note: Right now, `Succeeded` gets incremented whenever we were able to successfully respond
 	/// to a chunk request. This includes `NoSuchChunk` responses.
-    served_chunks: CounterVec<U64>,
+	served_chunks: CounterVec<U64>,
 
 	/// Number of times our first set of validators did not provide the needed chunk and we had to
 	/// query further validators.

From aadc80f9c1bca5e1f312e409a3502b06ddf94cdb Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Tue, 23 Feb 2021 19:41:10 +0100
Subject: [PATCH 50/60] Use typed default timeout constant.

---
 node/network/protocol/src/request_response.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/node/network/protocol/src/request_response.rs b/node/network/protocol/src/request_response.rs
index 2160d1905cab..75eb33cfabb0 100644
--- a/node/network/protocol/src/request_response.rs
+++ b/node/network/protocol/src/request_response.rs
@@ -64,7 +64,7 @@ pub enum Protocol {
 /// When decreasing this value, take into account that the very first request might need to open a
 /// connection, which can be slow. If this causes problems, we should ensure connectivity via peer
 /// sets.
-const DEFAULT_REQUEST_TIMEOUT: u64 = 3; 
+const DEFAULT_REQUEST_TIMEOUT: Duration = Duration::from_secs(3); 
 
 impl Protocol {
 	/// Get a configuration for a given Request response protocol.
@@ -90,7 +90,7 @@ impl Protocol {
 				max_request_size: 10_000,
 				max_response_size: 1_000_000,
 				// Also just some relative conservative guess:
-				request_timeout: Duration::from_secs(DEFAULT_REQUEST_TIMEOUT),
+				request_timeout: DEFAULT_REQUEST_TIMEOUT,
 				inbound_queue: Some(tx),
 			},
 		};

From e45f61c3e8f8d30f500a7ee0dd6fa7dbc6600993 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Tue, 23 Feb 2021 20:06:52 +0100
Subject: [PATCH 51/60] Make channel size 0, as each sender gets one slot
 anyways.

---
 node/network/availability-distribution/src/requester/mod.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/node/network/availability-distribution/src/requester/mod.rs b/node/network/availability-distribution/src/requester/mod.rs
index a9b95b813cde..914a86ef7def 100644
--- a/node/network/availability-distribution/src/requester/mod.rs
+++ b/node/network/availability-distribution/src/requester/mod.rs
@@ -76,7 +76,9 @@ impl Requester {
 	/// by advancing the stream.
 	pub fn new(keystore: SyncCryptoStorePtr, metrics: Metrics) -> Self {
 		// All we do is forwarding messages, no need to make this big.
-		let (tx, rx) = mpsc::channel(1);
+		// Each sender will get one slot, see
+		// [here](https://docs.rs/futures/0.3.13/futures/channel/mpsc/fn.channel.html).
+		let (tx, rx) = mpsc::channel(0);
 		Requester {
 			fetches: HashMap::new(),
 			session_cache: SessionCache::new(keystore),

From 43dfd1ccbc1632879e8cfbd736f3f7589e4169b6 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Tue, 23 Feb 2021 21:41:01 +0100
Subject: [PATCH 52/60] Fix incorrect metrics initialization.

---
 node/network/availability-distribution/src/metrics.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/node/network/availability-distribution/src/metrics.rs b/node/network/availability-distribution/src/metrics.rs
index a70efd76071a..0ece5acb38ee 100644
--- a/node/network/availability-distribution/src/metrics.rs
+++ b/node/network/availability-distribution/src/metrics.rs
@@ -84,7 +84,7 @@ impl metrics::Metrics for Metrics {
 						"Number of fetched chunks",
 						"Total number of fetched chunks.",
 					),
-					&[FAILED, SUCCEEDED]
+					&["success"]
 				)?,
 				registry,
 			)?,
@@ -94,7 +94,7 @@ impl metrics::Metrics for Metrics {
 						"Number of served chunks",
 						"Total number of chunks served by this backer.",
 					),
-					&[FAILED, SUCCEEDED, NOT_FOUND]
+					&["success"]
 				)?,
 				registry,
 			)?,

From 53531576eaadbbe9364d4f81c7979e774f6fbe5f Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Tue, 23 Feb 2021 21:49:12 +0100
Subject: [PATCH 53/60] Fix build after merge.

---
 runtime/parachains/src/inclusion.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/runtime/parachains/src/inclusion.rs b/runtime/parachains/src/inclusion.rs
index aaafe0fbd939..364705a2c3b1 100644
--- a/runtime/parachains/src/inclusion.rs
+++ b/runtime/parachains/src/inclusion.rs
@@ -1256,7 +1256,7 @@ mod tests {
 				let signed = block_on(sign_bitfield(
 					&keystore,
 					&validators[0],
-					0,
+					ValidatorIndex(0),
 					bare_bitfield,
 					&signing_context,
 				));

From ff944440e47ee1148c9860fdff2686f3f62c8ecf Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Tue, 23 Feb 2021 22:59:31 +0100
Subject: [PATCH 54/60] More fixes.

---
 node/core/approval-voting/src/lib.rs          | 2 +-
 node/network/approval-distribution/src/lib.rs | 2 +-
 node/network/availability-recovery/src/lib.rs | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/node/core/approval-voting/src/lib.rs b/node/core/approval-voting/src/lib.rs
index 00e8ce46fce9..ddc46a4d1c60 100644
--- a/node/core/approval-voting/src/lib.rs
+++ b/node/core/approval-voting/src/lib.rs
@@ -846,7 +846,7 @@ fn check_and_import_assignment(
 			tracing::trace!(
 				target: LOG_TARGET,
 				"Imported assignment from validator {} on candidate {:?}",
-				assignment.validator,
+				assignment.validator.0,
 				(assigned_candidate_hash, candidate_entry.candidate_receipt().descriptor.para_id),
 			);
 
diff --git a/node/network/approval-distribution/src/lib.rs b/node/network/approval-distribution/src/lib.rs
index 42de2eceeab6..4d98d58ba79c 100644
--- a/node/network/approval-distribution/src/lib.rs
+++ b/node/network/approval-distribution/src/lib.rs
@@ -164,7 +164,7 @@ impl State {
 				self.handle_peer_view_change(ctx, peer_id, view).await;
 			}
 			NetworkBridgeEvent::OurViewChange(view) => {
-				for head in &view.heads {
+				for head in view.iter() {
 					if !self.blocks.contains_key(head) {
 						self.pending_known.entry(*head).or_default();
 					}
diff --git a/node/network/availability-recovery/src/lib.rs b/node/network/availability-recovery/src/lib.rs
index ab192d492542..a18fe1eda96d 100644
--- a/node/network/availability-recovery/src/lib.rs
+++ b/node/network/availability-recovery/src/lib.rs
@@ -852,7 +852,7 @@ async fn handle_network_update(
 						chunk.is_some(),
 						request_id,
 						candidate_hash,
-						validator_index,
+						validator_index.0,
 					);
 
 					// Whatever the result, issue an
@@ -882,7 +882,7 @@ async fn handle_network_update(
 								chunk.is_some(),
 								request_id,
 								awaited_chunk.candidate_hash,
-								awaited_chunk.validator_index,
+								awaited_chunk.validator_index.0,
 							);
 
 							// If there exists an entry under r_id, remove it.
@@ -1003,7 +1003,7 @@ async fn issue_request(
 				request_id,
 				peer_id,
 				awaited_chunk.candidate_hash,
-				awaited_chunk.validator_index,
+				awaited_chunk.validator_index.0,
 			);
 
 			protocol_v1::AvailabilityRecoveryMessage::RequestChunk(
@@ -1019,7 +1019,7 @@ async fn issue_request(
 				request_id,
 				peer_id,
 				awaited_data.candidate_hash,
-				awaited_data.validator_index,
+				awaited_data.validator_index.0,
 			);
 
 			protocol_v1::AvailabilityRecoveryMessage::RequestFullData(

From 6b71e549b9d228afdbdc4f8298f7ae8de6fcd8d3 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Wed, 24 Feb 2021 09:47:16 +0100
Subject: [PATCH 55/60] Hopefully valid metrics names.

---
 node/network/availability-distribution/src/metrics.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/node/network/availability-distribution/src/metrics.rs b/node/network/availability-distribution/src/metrics.rs
index 0ece5acb38ee..bdd6b94ae8b5 100644
--- a/node/network/availability-distribution/src/metrics.rs
+++ b/node/network/availability-distribution/src/metrics.rs
@@ -81,7 +81,7 @@ impl metrics::Metrics for Metrics {
 			fetched_chunks: prometheus::register(
 				CounterVec::new(
 					Opts::new(
-						"Number of fetched chunks",
+						"fetched_chunks",
 						"Total number of fetched chunks.",
 					),
 					&["success"]
@@ -91,7 +91,7 @@ impl metrics::Metrics for Metrics {
 			served_chunks: prometheus::register(
 				CounterVec::new(
 					Opts::new(
-						"Number of served chunks",
+						"served_chunks",
 						"Total number of chunks served by this backer.",
 					),
 					&["success"]
@@ -100,7 +100,7 @@ impl metrics::Metrics for Metrics {
 			)?,
 			retries: prometheus::register(
 				Counter::new(
-					"Number of retries",
+					"fetch_retries",
 					"Number of times we did not succeed in fetching a chunk and needed to try more backers.",
 				)?,
 				registry,

From 190adaad94ce3267c380510b3000b863ccd79d39 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Thu, 25 Feb 2021 11:04:48 +0100
Subject: [PATCH 56/60] Better metrics names.

---
 node/network/availability-distribution/src/metrics.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/node/network/availability-distribution/src/metrics.rs b/node/network/availability-distribution/src/metrics.rs
index bdd6b94ae8b5..b7c41b04b83f 100644
--- a/node/network/availability-distribution/src/metrics.rs
+++ b/node/network/availability-distribution/src/metrics.rs
@@ -81,7 +81,7 @@ impl metrics::Metrics for Metrics {
 			fetched_chunks: prometheus::register(
 				CounterVec::new(
 					Opts::new(
-						"fetched_chunks",
+						"parachain_fetched_chunks_total",
 						"Total number of fetched chunks.",
 					),
 					&["success"]
@@ -91,7 +91,7 @@ impl metrics::Metrics for Metrics {
 			served_chunks: prometheus::register(
 				CounterVec::new(
 					Opts::new(
-						"served_chunks",
+						"parachain_served_chunks_total",
 						"Total number of chunks served by this backer.",
 					),
 					&["success"]
@@ -100,7 +100,7 @@ impl metrics::Metrics for Metrics {
 			)?,
 			retries: prometheus::register(
 				Counter::new(
-					"fetch_retries",
+					"parachain_fetch_retries_total",
 					"Number of times we did not succeed in fetching a chunk and needed to try more backers.",
 				)?,
 				registry,

From 8901344d6f1f2fc83f33ad5293d3601830b4b96c Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Thu, 25 Feb 2021 23:18:50 +0100
Subject: [PATCH 57/60] Some tests that already work.

---
 .../{fetch_task.rs => fetch_task/mod.rs}      |   3 +
 .../src/requester/fetch_task/tests.rs         | 166 ++++++++++++++++++
 2 files changed, 169 insertions(+)
 rename node/network/availability-distribution/src/requester/{fetch_task.rs => fetch_task/mod.rs} (99%)
 create mode 100644 node/network/availability-distribution/src/requester/fetch_task/tests.rs

diff --git a/node/network/availability-distribution/src/requester/fetch_task.rs b/node/network/availability-distribution/src/requester/fetch_task/mod.rs
similarity index 99%
rename from node/network/availability-distribution/src/requester/fetch_task.rs
rename to node/network/availability-distribution/src/requester/fetch_task/mod.rs
index 10d0821a23d0..28d92ef8d2f0 100644
--- a/node/network/availability-distribution/src/requester/fetch_task.rs
+++ b/node/network/availability-distribution/src/requester/fetch_task/mod.rs
@@ -42,6 +42,9 @@ use crate::{
 	metrics::{Metrics, SUCCEEDED, FAILED},
 };
 
+#[cfg(test)]
+mod tests;
+
 /// Configuration for a `FetchTask`
 ///
 /// This exists to separate preparation of a `FetchTask` from actual starting it, which is
diff --git a/node/network/availability-distribution/src/requester/fetch_task/tests.rs b/node/network/availability-distribution/src/requester/fetch_task/tests.rs
new file mode 100644
index 000000000000..510c2b04e8ef
--- /dev/null
+++ b/node/network/availability-distribution/src/requester/fetch_task/tests.rs
@@ -0,0 +1,166 @@
+// Copyright 2021 Parity Technologies (UK) Ltd.
+// This file is part of Polkadot.
+
+// Polkadot is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Polkadot is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
+
+use std::collections::HashMap;
+
+use parity_scale_codec::Encode;
+
+use futures::channel::{mpsc, oneshot};
+use futures::{executor, Future, FutureExt, StreamExt, select};
+use futures::task::{Poll, Context, noop_waker};
+
+use sc_network as network;
+use sp_keyring::Sr25519Keyring;
+
+use polkadot_primitives::v1::{CandidateHash, ValidatorIndex};
+use polkadot_node_network_protocol::request_response::v1;
+use polkadot_subsystem::messages::AllMessages;
+
+use crate::metrics::Metrics;
+use super::*;
+
+
+#[test]
+fn task_can_be_canceled() {
+	let (task, _rx) = get_test_running_task();
+	let (handle, kill) = oneshot::channel();
+	std::mem::drop(handle);
+	let running_task = task.run(kill);
+	futures::pin_mut!(running_task);
+	let waker = noop_waker();
+	let mut ctx = Context::from_waker(&waker);
+	assert!(running_task.poll(&mut ctx) == Poll::Ready(()), "Task is immediately finished");
+}
+
+/// Make sure task won't accept a chunk that has is invalid.
+#[test]
+fn task_does_not_accept_invalid_chunk() {
+	let (mut task, rx) = get_test_running_task();
+	let validators = vec![Sr25519Keyring::Alice.public().into()];
+	task.group = validators;
+	let test = TestRun {
+		chunk_responses:  {
+			let mut m = HashMap::new();
+			m.insert(
+				Sr25519Keyring::Alice.public().into(),
+				v1::ChunkResponse {
+					chunk: vec![1,2,3],
+					proof: vec![vec![9,8,2], vec![2,3,4]],
+				}
+			);
+			m
+		},
+		valid_chunks: HashSet::new(),
+	};
+	test.run(task, rx);
+}
+
+struct TestRun {
+	/// Response to deliver for a given validator index.
+	/// None means, answer with NetworkError.
+	chunk_responses: HashMap<AuthorityDiscoveryId, v1::ChunkResponse>,
+	/// Set of chunks that should be considered valid:
+	valid_chunks: HashSet<Vec<u8>>,
+}
+
+
+impl TestRun {
+	fn run(self, task: RunningTask, rx: mpsc::Receiver<FromFetchTask>) {
+		let mut rx = rx.fuse();
+		let task = task.run_inner().fuse();
+		futures::pin_mut!(task);
+		executor::block_on(async {
+			let mut end_ok = false;
+			loop {
+				let msg = select!(
+					from_task = rx.next() => {
+						match from_task {
+							Some(msg) => msg,
+							None => break,
+						}
+					},
+					() = task =>
+						break,
+				);
+				match msg {
+					FromFetchTask::Concluded(_) => break,
+					FromFetchTask::Message(msg) => 
+						end_ok = self.handle_message(msg).await,
+				}
+			}
+			if !end_ok {
+				panic!("Task ended prematurely (failed to store valid chunk)!");
+			}
+		});
+	}
+
+	/// Returns true, if after processing of the given message it would be ok for the stream to
+	/// end.
+	async fn handle_message(&self, msg: AllMessages) -> bool {
+		match msg {
+			AllMessages::NetworkBridge(NetworkBridgeMessage::SendRequests(reqs)) => {
+				let mut valid_responses = 0;
+				for req in reqs {
+					let req = match req {
+						Requests::AvailabilityFetching(req) => req,
+					};
+					let response = self.chunk_responses.get(&req.peer)
+						.ok_or(network::RequestFailure::Refused);
+
+					if let Ok(resp) = &response {
+						if self.valid_chunks.contains(&resp.chunk) {
+							valid_responses += 1;
+						}
+					}
+					req.pending_response.send(response.map(Encode::encode))
+						.expect("Sending response should succeed");
+				}
+				return (valid_responses == 0) && self.valid_chunks.is_empty()
+			}
+			AllMessages::AvailabilityStore(
+				AvailabilityStoreMessage::StoreChunk { chunk, .. }
+			) => {
+				assert!(self.valid_chunks.contains(&chunk.chunk));
+				return true
+			}
+			_ => {
+				tracing::debug!(target: LOG_TARGET, "Unexpected message");
+				return false
+			}
+		}
+	}
+}
+
+fn get_test_running_task() -> (RunningTask, mpsc::Receiver<FromFetchTask>) {
+	let (tx,rx) = mpsc::channel(0);
+
+	(
+		RunningTask {
+			session_index: 0,
+			group_index: GroupIndex(0),
+			group: Vec::new(),
+			request: AvailabilityFetchingRequest {
+				candidate_hash: CandidateHash([43u8;32].into()),
+				index: ValidatorIndex(0),
+			},
+			erasure_root: Hash::repeat_byte(99),
+			relay_parent: Hash::repeat_byte(71),
+			sender: tx,
+			metrics: Metrics::new_dummy(),
+		},
+		rx
+	)
+}

From 1d29b5cc5997905866272d8080b39fb598c0e23f Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Thu, 25 Feb 2021 23:19:21 +0100
Subject: [PATCH 58/60] Slightly better docs.

---
 node/subsystem/src/messages.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/node/subsystem/src/messages.rs b/node/subsystem/src/messages.rs
index 629b25df881e..85ebb2099327 100644
--- a/node/subsystem/src/messages.rs
+++ b/node/subsystem/src/messages.rs
@@ -269,7 +269,7 @@ impl NetworkBridgeMessage {
 /// Availability Distribution Message.
 #[derive(Debug, derive_more::From)]
 pub enum AvailabilityDistributionMessage {
-	/// Incoming request for an availability chunk.
+	/// Incoming network request for an availability chunk.
 	AvailabilityFetchingRequest(IncomingRequest<req_res_v1::AvailabilityFetchingRequest>)
 }
 

From 83ff6668396f5b9458ef4ef8a9a61a171768b4c4 Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Fri, 26 Feb 2021 14:19:12 +0100
Subject: [PATCH 59/60] Some more tests.

---
 .../availability-distribution/src/metrics.rs  |   5 +
 .../src/requester/fetch_task/mod.rs           |   2 +-
 .../src/requester/fetch_task/tests.rs         | 167 +++++++++++++++++-
 3 files changed, 164 insertions(+), 10 deletions(-)

diff --git a/node/network/availability-distribution/src/metrics.rs b/node/network/availability-distribution/src/metrics.rs
index b7c41b04b83f..c07500996fa2 100644
--- a/node/network/availability-distribution/src/metrics.rs
+++ b/node/network/availability-distribution/src/metrics.rs
@@ -53,6 +53,11 @@ struct MetricsInner {
 }
 
 impl Metrics {
+	/// Create new dummy metrics, not reporting anything.
+	pub fn new_dummy() -> Self {
+		Metrics(None)
+	}
+
 	/// Increment counter on fetched labels.
 	pub fn on_fetch(&self, label: &'static str) {
 		if let Some(metrics) = &self.0 {
diff --git a/node/network/availability-distribution/src/requester/fetch_task/mod.rs b/node/network/availability-distribution/src/requester/fetch_task/mod.rs
index 28d92ef8d2f0..3e187f9502e8 100644
--- a/node/network/availability-distribution/src/requester/fetch_task/mod.rs
+++ b/node/network/availability-distribution/src/requester/fetch_task/mod.rs
@@ -358,7 +358,7 @@ impl RunningTask {
 			match branch_hash(&self.erasure_root, &chunk.proof, chunk.index.0 as usize) {
 				Ok(hash) => hash,
 				Err(e) => {
-					tracing::trace!(
+					tracing::warn!(
 						target: LOG_TARGET,
 						candidate_hash = ?self.request.candidate_hash,
 						origin = ?validator,
diff --git a/node/network/availability-distribution/src/requester/fetch_task/tests.rs b/node/network/availability-distribution/src/requester/fetch_task/tests.rs
index 510c2b04e8ef..b4254850563c 100644
--- a/node/network/availability-distribution/src/requester/fetch_task/tests.rs
+++ b/node/network/availability-distribution/src/requester/fetch_task/tests.rs
@@ -15,6 +15,7 @@
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
 
 use std::collections::HashMap;
+use std::sync::Arc;
 
 use parity_scale_codec::Encode;
 
@@ -22,17 +23,17 @@ use futures::channel::{mpsc, oneshot};
 use futures::{executor, Future, FutureExt, StreamExt, select};
 use futures::task::{Poll, Context, noop_waker};
 
+use polkadot_erasure_coding::{obtain_chunks_v1 as obtain_chunks, branches};
 use sc_network as network;
 use sp_keyring::Sr25519Keyring;
 
-use polkadot_primitives::v1::{CandidateHash, ValidatorIndex};
+use polkadot_primitives::v1::{AvailableData, BlockData, CandidateHash, HeadData, PersistedValidationData, PoV, ValidatorIndex};
 use polkadot_node_network_protocol::request_response::v1;
 use polkadot_subsystem::messages::AllMessages;
 
 use crate::metrics::Metrics;
 use super::*;
 
-
 #[test]
 fn task_can_be_canceled() {
 	let (task, _rx) = get_test_running_task();
@@ -56,10 +57,12 @@ fn task_does_not_accept_invalid_chunk() {
 			let mut m = HashMap::new();
 			m.insert(
 				Sr25519Keyring::Alice.public().into(),
-				v1::ChunkResponse {
-					chunk: vec![1,2,3],
-					proof: vec![vec![9,8,2], vec![2,3,4]],
-				}
+				AvailabilityFetchingResponse::Chunk(
+					v1::ChunkResponse {
+						chunk: vec![1,2,3],
+						proof: vec![vec![9,8,2], vec![2,3,4]],
+					}
+				)
 			);
 			m
 		},
@@ -68,10 +71,126 @@ fn task_does_not_accept_invalid_chunk() {
 	test.run(task, rx);
 }
 
+#[test]
+fn task_stores_valid_chunk() {
+	let (mut task, rx) = get_test_running_task();
+	let (root_hash, chunk) = get_valid_chunk_data();
+	task.erasure_root = root_hash;
+	task.request.index = chunk.index;
+
+	let validators = vec![Sr25519Keyring::Alice.public().into()];
+	task.group = validators;
+
+	let test = TestRun {
+		chunk_responses:  {
+			let mut m = HashMap::new();
+			m.insert(
+				Sr25519Keyring::Alice.public().into(),
+				AvailabilityFetchingResponse::Chunk(
+					v1::ChunkResponse {
+						chunk: chunk.chunk.clone(),
+						proof: chunk.proof,
+					}
+				)
+			);
+			m
+		},
+		valid_chunks: {
+			let mut s = HashSet::new();
+			s.insert(chunk.chunk);
+			s
+		},
+	};
+	test.run(task, rx);
+}
+
+#[test]
+fn task_does_not_accept_wrongly_indexed_chunk() {
+	let (mut task, rx) = get_test_running_task();
+	let (root_hash, chunk) = get_valid_chunk_data();
+	task.erasure_root = root_hash;
+	task.request.index = ValidatorIndex(chunk.index.0+1);
+
+	let validators = vec![Sr25519Keyring::Alice.public().into()];
+	task.group = validators;
+
+	let test = TestRun {
+		chunk_responses:  {
+			let mut m = HashMap::new();
+			m.insert(
+				Sr25519Keyring::Alice.public().into(),
+				AvailabilityFetchingResponse::Chunk(
+					v1::ChunkResponse {
+						chunk: chunk.chunk.clone(),
+						proof: chunk.proof,
+					}
+				)
+			);
+			m
+		},
+		valid_chunks: HashSet::new(),
+	};
+	test.run(task, rx);
+}
+
+/// Task stores chunk, if there is at least one validator having a valid chunk.
+#[test]
+fn task_stores_valid_chunk_if_there_is_one() {
+	let (mut task, rx) = get_test_running_task();
+	let (root_hash, chunk) = get_valid_chunk_data();
+	task.erasure_root = root_hash;
+	task.request.index = chunk.index;
+
+	let validators = [
+			// Only Alice has valid chunk - should succeed, even though she is tried last.
+			Sr25519Keyring::Alice,
+			Sr25519Keyring::Bob, Sr25519Keyring::Charlie,
+			Sr25519Keyring::Dave, Sr25519Keyring::Eve,
+		]
+		.iter().map(|v| v.public().into()).collect::<Vec<_>>();
+	task.group = validators;
+
+	let test = TestRun {
+		chunk_responses:  {
+			let mut m = HashMap::new();
+			m.insert(
+				Sr25519Keyring::Alice.public().into(),
+				AvailabilityFetchingResponse::Chunk(
+					v1::ChunkResponse {
+						chunk: chunk.chunk.clone(),
+						proof: chunk.proof,
+					}
+				)
+			);
+			m.insert(
+				Sr25519Keyring::Bob.public().into(),
+				AvailabilityFetchingResponse::NoSuchChunk
+			);
+			m.insert(
+				Sr25519Keyring::Charlie.public().into(),
+				AvailabilityFetchingResponse::Chunk(
+					v1::ChunkResponse {
+						chunk: vec![1,2,3],
+						proof: vec![vec![9,8,2], vec![2,3,4]],
+					}
+				)
+			);
+
+			m
+		},
+		valid_chunks: {
+			let mut s = HashSet::new();
+			s.insert(chunk.chunk);
+			s
+		},
+	};
+	test.run(task, rx);
+}
+
 struct TestRun {
 	/// Response to deliver for a given validator index.
 	/// None means, answer with NetworkError.
-	chunk_responses: HashMap<AuthorityDiscoveryId, v1::ChunkResponse>,
+	chunk_responses: HashMap<AuthorityDiscoveryId, AvailabilityFetchingResponse>,
 	/// Set of chunks that should be considered valid:
 	valid_chunks: HashSet<Vec<u8>>,
 }
@@ -79,6 +198,7 @@ struct TestRun {
 
 impl TestRun {
 	fn run(self, task: RunningTask, rx: mpsc::Receiver<FromFetchTask>) {
+		sp_tracing::try_init_simple();
 		let mut rx = rx.fuse();
 		let task = task.run_inner().fuse();
 		futures::pin_mut!(task);
@@ -120,7 +240,7 @@ impl TestRun {
 					let response = self.chunk_responses.get(&req.peer)
 						.ok_or(network::RequestFailure::Refused);
 
-					if let Ok(resp) = &response {
+					if let Ok(AvailabilityFetchingResponse::Chunk(resp)) = &response {
 						if self.valid_chunks.contains(&resp.chunk) {
 							valid_responses += 1;
 						}
@@ -131,9 +251,10 @@ impl TestRun {
 				return (valid_responses == 0) && self.valid_chunks.is_empty()
 			}
 			AllMessages::AvailabilityStore(
-				AvailabilityStoreMessage::StoreChunk { chunk, .. }
+				AvailabilityStoreMessage::StoreChunk { chunk, tx, .. }
 			) => {
 				assert!(self.valid_chunks.contains(&chunk.chunk));
+				tx.send(Ok(())).expect("Answering fetching task should work");
 				return true
 			}
 			_ => {
@@ -144,6 +265,7 @@ impl TestRun {
 	}
 }
 
+/// Get a `RunningTask` filled with dummy values.
 fn get_test_running_task() -> (RunningTask, mpsc::Receiver<FromFetchTask>) {
 	let (tx,rx) = mpsc::channel(0);
 
@@ -164,3 +286,30 @@ fn get_test_running_task() -> (RunningTask, mpsc::Receiver<FromFetchTask>) {
 		rx
 	)
 }
+
+fn get_valid_chunk_data() -> (Hash, ErasureChunk) {
+	let fake_validator_count = 10;
+	let persisted = PersistedValidationData {
+		parent_head: HeadData(vec![7, 8, 9]),
+		relay_parent_number: Default::default(),
+		max_pov_size: 1024,
+		relay_parent_storage_root: Default::default(),
+	};
+	let pov_block = PoV {
+		block_data: BlockData(vec![45, 46, 47]),
+	};
+	let available_data = AvailableData {
+		validation_data: persisted, pov: Arc::new(pov_block),
+	};
+	let chunks = obtain_chunks(fake_validator_count, &available_data).unwrap();
+	let branches = branches(chunks.as_ref());
+	let root = branches.root();
+	let chunk = branches.enumerate()
+			.map(|(index, (proof, chunk))| ErasureChunk {
+				chunk: chunk.to_vec(),
+				index: ValidatorIndex(index as _),
+				proof,
+			})
+			.next().expect("There really should be 10 chunks.");
+	(root, chunk)
+}

From a0e01ec1f91e136856c56a558ff63949f098d3af Mon Sep 17 00:00:00 2001
From: Robert Klotzner <robert.klotzner@gmx.at>
Date: Fri, 26 Feb 2021 15:08:02 +0100
Subject: [PATCH 60/60] Fix network bridge test.

---
 node/network/bridge/src/lib.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/node/network/bridge/src/lib.rs b/node/network/bridge/src/lib.rs
index c50cf49c2ce4..25baae8b4671 100644
--- a/node/network/bridge/src/lib.rs
+++ b/node/network/bridge/src/lib.rs
@@ -1524,7 +1524,7 @@ mod tests {
 	fn spread_event_to_subsystems_is_up_to_date() {
 		// Number of subsystems expected to be interested in a network event,
 		// and hence the network event broadcasted to.
-		const EXPECTED_COUNT: usize = 6;
+		const EXPECTED_COUNT: usize = 5;
 
 		let mut cnt = 0_usize;
 		for msg in AllMessages::dispatch_iter(NetworkBridgeEvent::PeerDisconnected(PeerId::random())) {