ProtocolState -> Requester

Also make sure to not fetch our own chunk.
paritytech · rphmeier · Feb 26, 2021 · Feb 4, 2021 · Feb 9, 2021 · Feb 9, 2021
commit 452b55f2cd7582f10ebaa25bf4dbd7818dbc0dae
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/node/network/availability-distribution/Cargo.toml b/node/network/availability-distribution/Cargo.toml
@@ -19,7 +19,6 @@ sp-application-crypto = { git = "https://github.com/paritytech/substrate", branc
 sp-core = { git = "https://github.com/paritytech/substrate", branch = "master", features = ["std"]  }
 sp-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" }
 thiserror = "1.0.23"
-itertools = "0.10.0"
 rand = "0.8.3"
 lru = "0.6.5"
 

diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs
@@ -32,13 +32,9 @@ mod error;
 pub use error::Error;
 use error::Result;
 
-/// The actual implementation of running availability distribution.
-mod state;
-/// State of a running availability-distribution subsystem.
-use state::ProtocolState;
-
-/// A task fetching a particular chunk.
-mod fetch_task;
+/// `Requester` taking care of requesting chunks for candidates pending availability.
+mod requester;
+use requester::Requester;
 
 /// Cache for session information.
 mod session_cache;
@@ -85,7 +81,7 @@ impl AvailabilityDistributionSubsystem {
 	where
 		Context: SubsystemContext<Message = AvailabilityDistributionMessage> + Sync + Send,
 	{
-		let mut state = ProtocolState::new(self.keystore.clone()).fuse();
+		let mut state = Requester::new(self.keystore.clone()).fuse();
 		loop {
 			let action = {
 				let mut subsystem_next = ctx.recv().fuse();

diff --git a/...rk/availability-distribution/src/state.rs → ...vailability-distribution/src/requester.rs b/...rk/availability-distribution/src/state.rs → ...vailability-distribution/src/requester.rs
@@ -14,44 +14,8 @@
 // You should have received a copy of the GNU General Public License
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
 
-//! `ProtocolState` representing a running availability distribution subsystem.
-//!
-//! We keep track of [`FetchTask`]s, which get created on [`ActiveLeavesUpdate`]s for each occupied
-//! core in the leaves, if we have not yet created it before. We keep track for which
-//! relay parents a `FetchTask` is considered live (corresponding slot is occupied with the
-//! candidate fetched). Once there is no relay parent left for which that task is considered live,
-//! it gets removed.
-//!
-//! We keep that task around as long as its corresponding candidate is considered pending
-//! availability, even if we fetched our chunk already. This is so we won't fetch our piece again,
-//! just because the candidate is still pending availability in the next block.
-//!
-//! We are also dependent on session information. We need to know which validators are in a
-//! particular validator group, backing our candidate, so we can request our erasure chunk from
-//! them.
-//!
-//! We want to randomize the list of validators in each group, so we get a
-//! random order of validators to try to get the chunk from. This is to ensure load balancing, each
-//! requesting validator should have a different order, thus trying different validators.
-//!
-//! But We would like to keep that randomized order around for an entire session, so our particular
-//! validator will always request from the same validators, thus making sure it will find an open
-//! network connection on each request.
-//!
-//! (TODO: What to do on session boundaries? Initial delay acceptable? Connect with some fake
-//! request to future validators? Use a peer set after all and connect that to the future session?)
-//!
-//! So we need to keep some customized session info around, which seems to be a good idea for
-//! performance reasons anyway. That's where `SessionCache` comes into play. It is used to keep
-//! session information around as long as we need it. But how long do we need it? How do we manage
-//! that cache? We can't rely on `ActiveLeavesUpdate`s heads alone, as we might get occupied slots
-//! for heads we never got an `ActiveLeavesUpdate` from, therefore we don't populate the session
-//! cache with sessions our leaves correspond to, but directly with the sessions of the relay
-//! parents of our `CandidateDescriptor`s. So, its clear how to populate the cache, but when can we
-//! get rid of cached session information? If for sure is safe to do when there is no
-//! candidate/FetchTask around anymore which references it. Thus the cache simply consists of
-//! `Weak` pointers to the actual session infos and the `FetchTask`s keep `Rc`s, therefore we know
-//! exactly when we can get rid of a cache entry by means of the Weak pointer evaluating to `None`.
+//! Requester takes care of requesting erasure chunks for candidates that are pending
+//! availability.
 
 use std::collections::{
 	hash_map::{Entry, HashMap},
@@ -83,21 +47,26 @@ use polkadot_subsystem::{
 	SubsystemContext, SubsystemError,
 };
 
-use super::{
-	error::recv_runtime,
-	fetch_task::{FetchTask, FetchTaskConfig, FromFetchTask},
-	session_cache::SessionCache,
-	Result, LOG_TARGET,
-};
+use super::{error::recv_runtime, session_cache::SessionCache, Result, LOG_TARGET};
+
+/// A task fetching a particular chunk.
+mod fetch_task;
+use fetch_task::{FetchTask, FetchTaskConfig, FromFetchTask};
 
-/// A running instance of this subsystem.
-pub struct ProtocolState {
+/// Requester takes care of requesting erasure chunks from backing groups and stores them in the
+/// av store.
+///
+/// It implements a stream that needs to be advanced for it making progress.
+pub struct Requester {
 	/// Candidates we need to fetch our chunk for.
+	///
+	/// We keep those around as long as a candidate is pending availability on some leaf, so we
+	/// won't fetch chunks multiple times.
 	fetches: HashMap<CandidateHash, FetchTask>,
 
 	/// Localized information about sessions we are currently interested in.
 	///
-	/// This is usually the current one and at session boundaries also the last one.
+	/// This is the current one and the last one.
 	session_cache: SessionCache,
 
 	/// Sender to be cloned for `FetchTask`s.
@@ -107,11 +76,15 @@ pub struct ProtocolState {
 	rx: mpsc::Receiver<FromFetchTask>,
 }
 
-impl ProtocolState {
+impl Requester {
+	/// Create a new `Requester`.
+	///
+	/// You must feed it with `ActiveLeavesUpdate` via `update_fetching_heads` and make it progress
+	/// by advancing the stream.
 	pub(crate) fn new(keystore: SyncCryptoStorePtr) -> Self {
 		// All we do is forwarding messages, no need to make this big.
 		let (tx, rx) = mpsc::channel(1);
-		ProtocolState {
+		Requester {
 			fetches: HashMap::new(),
 			session_cache: SessionCache::new(keystore),
 			tx,
@@ -120,7 +93,7 @@ impl ProtocolState {
 	}
 	/// Update heads that need availability distribution.
 	///
-	/// For all active heads we will be fetching our chunk for availabilty distribution.
+	/// For all active heads we will be fetching our chunks for availabilty distribution.
 	pub(crate) async fn update_fetching_heads<Context>(
 		&mut self,
 		ctx: &mut Context,
@@ -159,8 +132,6 @@ impl ProtocolState {
 
 	/// Stop requesting chunks for obsolete heads.
 	///
-	/// Returns relay_parents which became irrelevant for availability fetching (are not
-	/// referenced by any candidate anymore).
 	fn stop_requesting_chunks(&mut self, obsolete_leaves: impl Iterator<Item = Hash>) {
 		let obsolete_leaves: HashSet<_> = obsolete_leaves.into_iter().collect();
 		self.fetches.retain(|&c_hash, task| {
@@ -213,7 +184,7 @@ impl ProtocolState {
 	}
 }
 
-impl Stream for ProtocolState {
+impl Stream for Requester {
 	type Item = Result<AllMessages>;
 
 	fn poll_next(

diff --git a/...ailability-distribution/src/fetch_task.rs → ...-distribution/src/requester/fetch_task.rs b/...ailability-distribution/src/fetch_task.rs → ...-distribution/src/requester/fetch_task.rs
@@ -43,7 +43,7 @@ use polkadot_subsystem::{
 	Subsystem, SubsystemContext, SubsystemError, SubsystemResult,
 };
 
-use super::{
+use crate::{
 	error::{Error, Result},
 	session_cache::{BadValidators, SessionInfo},
 	LOG_TARGET,
@@ -54,7 +54,7 @@ use super::{
 /// This exists to separate preparation of a `FetchTask` from actual starting it, which is
 /// beneficial as this allows as for taking session info by reference.
 pub struct FetchTaskConfig {
-	prepared_running: RunningTask,
+	prepared_running: Option<RunningTask>,
 	live_in: HashSet<Hash>,
 }
 
@@ -129,7 +129,17 @@ impl FetchTaskConfig {
 		sender: mpsc::Sender<FromFetchTask>,
 		session_info: &SessionInfo,
 	) -> Self {
-		let prepared_running =  RunningTask {
+		let live_in = vec![leaf].into_iter().collect();
+
+		// Don't run tasks for our backing group:
+		if session_info.our_group == core.group_responsible {
+			return FetchTaskConfig {
+				live_in,
+				prepared_running: None,
+			};
+		}
+
+		let prepared_running = RunningTask {
 			session_index: session_info.session_index,
 			group_index: core.group_responsible,
 			group: session_info.validator_groups.get(core.group_responsible.0 as usize).expect("The responsible group of a candidate should be available in the corresponding session. qed.").clone(),
@@ -142,8 +152,8 @@ impl FetchTaskConfig {
 			sender,
 		};
 		FetchTaskConfig {
-			live_in: vec![leaf].into_iter().collect(),
-			prepared_running,
+			live_in,
+			prepared_running: Some(prepared_running),
 		}
 	}
 }
@@ -158,14 +168,24 @@ impl FetchTask {
 			prepared_running,
 			live_in,
 		} = config;
-		let (handle, kill) = oneshot::channel();
-		ctx.spawn("chunk-fetcher", prepared_running.run(kill).boxed())
-			.await
-			.map_err(|e| Error::SpawnTask(e))?;
-		Ok(FetchTask {
-			live_in,
-			state: FetchedState::Started(handle),
-		})
+
+		if let Some(running) = prepared_running {
+			let (handle, kill) = oneshot::channel();
+
+			ctx.spawn("chunk-fetcher", running.run(kill).boxed())
+				.await
+				.map_err(|e| Error::SpawnTask(e))?;
+
+			Ok(FetchTask {
+				live_in,
+				state: FetchedState::Started(handle),
+			})
+		} else {
+			Ok(FetchTask {
+				live_in,
+				state: FetchedState::Canceled,
+			})
+		}
 	}
 
 	/// Add the given leaf to the relay parents which are making this task relevant.

diff --git a/node/network/availability-distribution/src/session_cache.rs b/node/network/availability-distribution/src/session_cache.rs
@@ -80,10 +80,9 @@ pub struct SessionInfo {
 	/// Information about ourself:
 	pub our_index: ValidatorIndex,
 
-	//// Remember to which group we blong, so we won't start fetching chunks for candidates we
-	//// backed our selves.
-	// TODO: Implement this:
-	// pub our_group: GroupIndex,
+	/// Remember to which group we belong, so we won't start fetching chunks for candidates those
+	/// candidates (We should have them via PoV distribution).
+	pub our_group: GroupIndex,
 }
 
 /// Report of bad validators.
@@ -133,10 +132,10 @@ impl SessionCache {
 		ctx: &mut Context,
 		parent: Hash,
 		with_info: F,
-		) -> Result<Option<R>>
-		where
+	) -> Result<Option<R>>
+	where
 		Context: SubsystemContext,
-		F: FnOnce(&SessionInfo) -> R
+		F: FnOnce(&SessionInfo) -> R,
 	{
 		let session_index = match self.session_index_cache.get(&parent) {
 			Some(index) => *index,
@@ -150,7 +149,7 @@ impl SessionCache {
 		};
 
 		if let Some(info) = self.session_info_cache.get(&session_index) {
-			return Ok(Some(with_info(info)))
+			return Ok(Some(with_info(info)));
 		}
 
 		if let Some(info) = self
@@ -205,6 +204,22 @@ impl SessionCache {
 			.ok_or(Error::NoSuchSession(session_index))?;
 
 		if let Some(our_index) = self.get_our_index(validators).await {
+			// Get our group index:
+			let our_group = validator_groups
+				.iter()
+				.enumerate()
+				.find_map(|(i, g)| {
+					g.iter().find_map(|v| {
+						if *v == our_index {
+							Some(GroupIndex(i as u32))
+						} else {
+							None
+						}
+					})
+				})
+				// TODO: Make sure this is correct and should be enforced:
+				.expect("Every validator should be in a validator group. qed.");
+
 			// Shuffle validators in groups:
 			let mut rng = thread_rng();
 			for g in validator_groups.iter_mut() {
@@ -228,6 +243,7 @@ impl SessionCache {
 				validator_groups,
 				our_index,
 				session_index,
+				our_group,
 			};
 			return Ok(Some(info));
 		}