Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
176 commits
Select commit Hold shift + click to select a range
051328a
Draft of RecoveryStrategy based on linked enums
alindima Sep 6, 2023
166aae5
add copright header
alindima Sep 8, 2023
518d8fd
fix clippy
alindima Sep 8, 2023
dae34a5
Refactor RecoveryStrategy using dynamic dispatch
alindima Sep 8, 2023
640c2d5
address comments
alindima Sep 12, 2023
ac18bab
Merge branch 'master' into alindima/refactor-availability-recovery-st…
alindima Sep 12, 2023
2269f76
erasure-coding: add algorithm for systematic recovery
alindima Sep 12, 2023
dca9ce4
WIP
alindima Sep 13, 2023
9eeb93a
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Sep 13, 2023
6f58c3a
continue implementation
alindima Sep 14, 2023
a805a85
Fix some tests
alindima Sep 14, 2023
d83bf50
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Sep 14, 2023
2a5d6d5
Merge remote-tracking branch 'origin/master' into alindima/refactor-a…
alindima Sep 14, 2023
f3d407b
replace hashmap of chunks with btreemap
alindima Sep 15, 2023
67841bd
add chunk indices cache to av-recovery
alindima Sep 15, 2023
3810eab
some more improvements
alindima Sep 15, 2023
bf39ba0
don't use the backing group if chunk size query failed
alindima Sep 15, 2023
34408b5
add ImmediateError to chunks recovery strategy
alindima Sep 15, 2023
b68e671
modify and add metrics
alindima Sep 15, 2023
1569e49
more review comments
alindima Sep 18, 2023
d5c32d1
fix test
alindima Sep 18, 2023
cb42bef
rollback to using TryConnect for fetch chunks
alindima Sep 18, 2023
06f00a2
add runtime API knob for shuffling enablement
alindima Sep 18, 2023
2b58895
av-recovery and av-distr: use relay parent from candidate_receipt
alindima Sep 18, 2023
f860b7d
add avail_chunk_shuffling params to HostConfiguration
alindima Sep 18, 2023
b82b2a0
replace BypassAvStore variant with a separate flag
alindima Sep 19, 2023
8585628
move requesting_chunks to the strategy
alindima Sep 19, 2023
2f9b767
Merge remote-tracking branch 'origin/master' into alindima/refactor-a…
alindima Sep 19, 2023
824f7e3
Merge remote-tracking branch 'origin/alindima/refactor-availability-r…
alindima Sep 19, 2023
818d9c0
fix tests
alindima Sep 19, 2023
6b8afcb
add erasure-coding benchmark for systematic recovery
alindima Sep 19, 2023
ce633b4
fix clippy
alindima Sep 20, 2023
c1a5dbf
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Sep 20, 2023
0fb256a
av-distribution is not sending av-recovery messages
alindima Sep 20, 2023
b2c92b3
fix clippy
alindima Sep 20, 2023
3c3371a
add client_features runtime API
alindima Sep 22, 2023
3d82eb9
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Sep 22, 2023
d57396b
rustfmt
alindima Sep 22, 2023
0a46a21
fix test
alindima Sep 22, 2023
e2ce0cd
add client_features to westend, fix fmt and clippy
alindima Sep 22, 2023
cc06a71
try fixing clippy again
alindima Sep 22, 2023
a83b178
fix copy-paste mistake
alindima Sep 26, 2023
6e06d27
fix metrics and logs
alindima Sep 27, 2023
ce10f68
add new shuffling algorithm
alindima Sep 27, 2023
b3ea7c9
add newtype for ChunkIndex
alindima Sep 27, 2023
926bcb0
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Sep 27, 2023
6b6e924
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Sep 27, 2023
f28e01e
add v10 config migration to westend
alindima Sep 27, 2023
21811ef
replace u8 ClientFeatures with u64
alindima Sep 28, 2023
ad43e4a
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Sep 28, 2023
67954e6
some fixes to get clippy to pass
alindima Sep 28, 2023
c93985b
use Pallet::current_storage_version()::put() instead of explicitly
alindima Sep 28, 2023
201b09e
replace RecoveryError::Unavailable with Invalid
alindima Sep 28, 2023
9c51bf2
add tests to availability-distribution
alindima Sep 28, 2023
fabc044
add comma
alindima Sep 28, 2023
8ee0027
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Oct 3, 2023
4283214
fix migration test
alindima Oct 3, 2023
77d5370
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Oct 9, 2023
a9ef2a4
add some more tests
alindima Oct 9, 2023
2035f30
add more tests and small fix
alindima Oct 11, 2023
eef1e14
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Oct 16, 2023
fac014c
bump novelpoly
alindima Oct 16, 2023
7a71f0f
av-recovery: revert to with_chunks_if_pov_large for testing purposes
alindima Oct 16, 2023
6973295
improve migrate_to_v10 test
alindima Oct 16, 2023
1af0657
address some review comments
alindima Oct 16, 2023
38e1361
some more tests
alindima Oct 16, 2023
1d825df
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Oct 16, 2023
1318a21
enable systematic recovery for testing purposes
alindima Oct 16, 2023
a98cdd6
don't request chunks that past strategies deemed not available
alindima Oct 18, 2023
9dc87ae
more tests
alindima Oct 19, 2023
c462e25
fix infinite request loop for network errors.
alindima Oct 19, 2023
fc621f9
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Oct 19, 2023
716606e
add extensive unit tests for av-recovery
alindima Oct 19, 2023
c0b1c6c
fix clippy
alindima Oct 19, 2023
862501b
runtime config: enable AVAILABILITY_CHUNK_SHUFFLING for new chains
alindima Oct 19, 2023
91a52fa
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Oct 24, 2023
8b0d583
use fatality in av-recovery
alindima Oct 24, 2023
25ba4fd
bump SYSTEMATIC_CHUNKS_REQ_RETRY_LIMIT to 2
alindima Oct 24, 2023
850c0a3
rename historical_errors to recorded_errors
alindima Oct 24, 2023
ab4d8d1
deduplicate get_block_number
alindima Oct 24, 2023
d9282ad
address some review comments
alindima Oct 24, 2023
9deec5f
more tests
alindima Oct 24, 2023
8494496
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Oct 25, 2023
5524385
remove unused env_logger from test
alindima Oct 25, 2023
d2babf7
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Oct 31, 2023
13c4253
small metrics and logging improvements
alindima Nov 3, 2023
b8792ff
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Nov 3, 2023
81ebaa8
add more explicit error message on decode
alindima Nov 7, 2023
ef893c3
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Nov 7, 2023
f681e1e
add more metrics for full data requests
alindima Nov 8, 2023
60f0e79
move ChunkIndexCacheRegistry to its own module
alindima Nov 8, 2023
90fdec8
fix test compilation
alindima Nov 8, 2023
68ad6a6
add license header
alindima Nov 8, 2023
4e22ef7
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Nov 8, 2023
a943f25
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Nov 10, 2023
6e34e90
fix cumulus pov_recovery with RPC node
alindima Nov 10, 2023
5feed4a
fix test
alindima Nov 10, 2023
949e732
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Nov 14, 2023
db1225b
add more tests for #2287
alindima Nov 14, 2023
b4a125b
add backers as backup for requesting systematic chunks
alindima Nov 14, 2023
d151626
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Nov 14, 2023
e58a405
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Nov 20, 2023
0906bf0
fix clippy and remove unneeded dep
alindima Nov 20, 2023
0ed37a8
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Dec 12, 2023
2d0b587
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Dec 19, 2023
894880b
update lockfile
alindima Dec 19, 2023
fada5d9
integrate reed-solomon from master git branch
alindima Dec 19, 2023
145097e
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Dec 20, 2023
4233871
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Jan 11, 2024
965c139
Adapt code to new design from RFC
alindima Jan 11, 2024
4530caa
fix av-store tests
alindima Jan 18, 2024
1ee008c
don't allow systematic recovery if chunk mapping is disabled
alindima Jan 18, 2024
46486db
launch systematic recovery during approval voting
alindima Jan 18, 2024
376619e
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Jan 18, 2024
199bc88
add zombienet test for network-level compatibility
alindima Jan 18, 2024
d923269
add systematic recovery to subsystem-bench
alindima Jan 19, 2024
bc22b81
fix clippy
alindima Jan 19, 2024
3edd33b
fix some backing tests
alindima Jan 19, 2024
c9ab410
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Jan 19, 2024
2d2d868
random small fixes
alindima Jan 19, 2024
557e410
try fixing zombienet test
alindima Jan 19, 2024
3df708b
add v2 protocol to cumulus
alindima Jan 19, 2024
e6de96a
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Jan 26, 2024
278028b
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Jan 26, 2024
65c8ecf
fix clippy
alindima Jan 26, 2024
951119e
address some review feedback
alindima Jan 26, 2024
c94e7d5
some more comments
alindima Jan 26, 2024
e4cbf1d
fill prdoc
alindima Jan 26, 2024
2f21035
zombienet tests
alindima Jan 26, 2024
33d6162
try fixing zombienet tests
alindima Jan 29, 2024
ad975a6
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Jan 29, 2024
eddae38
update implementer's guide
alindima Jan 30, 2024
c2f0f23
markdown format
alindima Jan 30, 2024
9b49d40
move erasure_task_tx to the common recovery params
alindima Jan 30, 2024
8f027fb
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Feb 9, 2024
826208f
address some review comments
alindima Feb 9, 2024
d25cc1c
try fixing zombienet tests
alindima Feb 9, 2024
4e958ba
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Feb 9, 2024
dbc27b3
more zombienet
alindima Feb 9, 2024
6aa3b16
zombienet
alindima Feb 12, 2024
bf93b63
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Feb 12, 2024
2316316
fix yaml formatting
alindima Feb 12, 2024
0c57295
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Feb 12, 2024
df8a096
fix import
alindima Feb 12, 2024
486cf64
fix bench
alindima Feb 12, 2024
70e8840
clippy
alindima Feb 12, 2024
ee93f68
fix image name
alindima Feb 12, 2024
8a2931c
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Feb 12, 2024
7d9e21a
try another image for the zombienet test
alindima Feb 12, 2024
9000f5c
Use cumulus image for collator
pepoviola Feb 13, 2024
e2a95c2
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Feb 26, 2024
eeab22a
fix yaml format
alindima Feb 27, 2024
e39ea78
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Mar 11, 2024
a57f5fd
fix merge commit
alindima Mar 11, 2024
6948ea1
fix bench
alindima Mar 11, 2024
91a6180
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima Mar 22, 2024
da66e88
re-add enable-node-feature script
alindima Mar 22, 2024
d20ced9
fix zombienet tests
alindima Mar 22, 2024
3ace495
Merge remote-tracking branch 'origin' into alindima/add-systematic-ch…
alindima May 9, 2024
78a7959
add semver to prdoc
alindima May 10, 2024
da8ca22
try fixing prdoc
alindima May 10, 2024
84ca508
Merge branch 'master' into alindima/add-systematic-chunks-av-recovery
alindima May 13, 2024
5dc877e
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima May 16, 2024
345d896
update lockfile
alindima May 21, 2024
01795b0
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima May 21, 2024
0ed57f8
unify fatality versions
alindima May 21, 2024
8acc2c1
some metrics and logs polishes
alindima May 24, 2024
7ddf494
more details to prdoc
alindima May 24, 2024
5bd966f
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima May 24, 2024
c721152
prdoc fixup
alindima May 24, 2024
6faa1ee
fix
alindima May 24, 2024
25dc880
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima May 27, 2024
5669eec
try to make markdown linter happy
alindima May 27, 2024
99f09e3
use higher glutton PoV sizes for tests
alindima May 27, 2024
ce08e60
bump zombienet version
pepoviola May 27, 2024
fdbc31f
Merge remote-tracking branch 'origin/master' into alindima/add-system…
alindima May 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
some more improvements
  • Loading branch information
alindima committed Sep 15, 2023
commit 3810eab0273e7ed3d8c7fd0868023d654b38bf58
Original file line number Diff line number Diff line change
Expand Up @@ -259,10 +259,9 @@ impl Requester {
})?;

if let Some(session_info) = session_info {
// TODO: optimise this n_validators calculation.
let n_validators =
session_info.validator_groups.iter().fold(0, |mut acc, group| {
acc += group.len();
session_info.validator_groups.iter().fold(0usize, |mut acc, group| {
acc = acc.saturating_add(group.len());
acc
});
let chunk_index = self
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,6 @@ impl SessionCache {
// Get our group index:
let our_group = info.validator_info.our_group;

// TODO: This shuffling is fine, as it only is used for knowing which validator to ask
// in order to get the chunk. The chunk index is coming from somewhere else.

// Shuffle validators in groups:
let mut rng = thread_rng();
for g in validator_groups.iter_mut() {
Expand Down
16 changes: 8 additions & 8 deletions polkadot/node/network/availability-recovery/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -519,16 +519,16 @@ async fn handle_recover<Context>(
let systematic_threshold =
systematic_recovery_threshold(session_info.validators.len())?;

let validators =
shuffling.into_iter().fold(BTreeMap::new(), |mut acc, (c_index, v_index)| {
if usize::try_from(c_index.0)
// Only get the validators according to the threshold.
let validators = shuffling
.clone()
.into_iter()
.filter(|(c_index, _)| {
usize::try_from(c_index.0)
.expect("usize is at least u32 bytes on all modern targets.") <
systematic_threshold
{
acc.insert(*c_index, *v_index);
}
acc
});
})
.collect();

recovery_strategies.push_back(Box::new(FetchSystematicChunks::new(
FetchSystematicChunksParams { validators, erasure_task_tx },
Expand Down
111 changes: 67 additions & 44 deletions polkadot/node/network/availability-recovery/src/task.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ pub struct State {
received_chunks: BTreeMap<ChunkIndex, ErasureChunk>,
/// Collection of in-flight requests.
requesting_chunks:
FuturesUndead<Result<Option<ErasureChunk>, (ChunkIndex, ValidatorIndex, RequestError)>>,
FuturesUndead<(ChunkIndex, ValidatorIndex, Result<Option<ErasureChunk>, RequestError>)>,
}

impl State {
Expand Down Expand Up @@ -184,7 +184,15 @@ impl State {
let candidate_hash = &params.candidate_hash;
let already_requesting_count = self.requesting_chunks.len();

let mut requests = Vec::with_capacity(desired_requests_count - already_requesting_count);
let to_launch = desired_requests_count - already_requesting_count;
let mut requests = Vec::with_capacity(to_launch);

gum::trace!(
target: LOG_TARGET,
?candidate_hash,
"Attempting to launch {} requests",
to_launch
);

while self.requesting_chunks.len() < desired_requests_count {
if let Some((chunk_index, validator_index)) = validators.pop_back() {
Expand Down Expand Up @@ -212,12 +220,14 @@ impl State {

self.requesting_chunks.push(Box::pin(async move {
let _timer = timer;
match res.await {
let res = match res.await {
Ok(req_res::v1::ChunkFetchingResponse::Chunk(chunk)) =>
Ok(Some(chunk.recombine_into_chunk(&raw_request))),
Ok(req_res::v1::ChunkFetchingResponse::NoSuchChunk) => Ok(None),
Err(e) => Err((chunk_index, validator_index, e)),
}
Err(e) => Err(e),
};

(chunk_index, validator_index, res)
}));
} else {
break
Expand Down Expand Up @@ -247,19 +257,22 @@ impl State {
// Wait for all current requests to conclude or time-out, or until we reach enough chunks.
// We also declare requests undead, once `TIMEOUT_START_NEW_REQUESTS` is reached and will
// return in that case for `launch_parallel_requests` to fill up slots again.
while let Some(request_result) =
while let Some(res) =
self.requesting_chunks.next_with_timeout(TIMEOUT_START_NEW_REQUESTS).await
{
total_received_responses += 1;

let (chunk_index, validator_index, request_result) = res;

match request_result {
Ok(Some(chunk)) =>
if is_chunk_valid(params, &chunk) {
metrics.on_chunk_request_succeeded();
gum::trace!(
target: LOG_TARGET,
candidate_hash = ?params.candidate_hash,
chunk_index = ?chunk.index,
chunk_index = ?chunk_index,
?validator_index,
"Received valid chunk",
);
self.insert_chunk(chunk.index, chunk);
Expand All @@ -269,28 +282,36 @@ impl State {
},
Ok(None) => {
metrics.on_chunk_request_no_such_chunk();
gum::trace!(
target: LOG_TARGET,
candidate_hash = ?params.candidate_hash,
chunk_index = ?chunk_index,
?validator_index,
"Validator did not have the requested chunk",
);
error_count += 1;
},
Err((chunk_index, validator_index, e)) => {
Err(err) => {
error_count += 1;

gum::trace!(
target: LOG_TARGET,
candidate_hash= ?params.candidate_hash,
err = ?e,
?err,
chunk_index = ?chunk_index,
?validator_index,
?chunk_index,
"Failure requesting chunk",
);

match e {
match err {
RequestError::InvalidResponse(_) => {
metrics.on_chunk_request_invalid();

gum::debug!(
target: LOG_TARGET,
candidate_hash = ?params.candidate_hash,
err = ?e,
?err,
chunk_index = ?chunk_index,
?validator_index,
"Chunk fetching response was invalid",
);
Expand Down Expand Up @@ -328,7 +349,7 @@ impl State {
received_chunks_count = ?self.chunk_count(),
requested_chunks_count = ?self.requesting_chunks.len(),
threshold = ?params.threshold,
"Can conclude availability for a candidate",
"Can conclude availability recovery for a candidate",
);
break
}
Expand Down Expand Up @@ -558,16 +579,18 @@ impl<Sender: overseer::AvailabilityRecoverySenderTrait> RecoveryStrategy<Sender>
/// `RecoveryStrategy` that attempts to recover the systematic chunks from the validators that
/// hold them, in order to bypass the erasure code reconstruction step, which is costly.
pub struct FetchSystematicChunks {
/// Systematic recovery threshold.
threshold: usize,
validators: BTreeMap<ChunkIndex, ValidatorIndex>,
/// Validators that hold the systematic chunks.
validators: VecDeque<(ChunkIndex, ValidatorIndex)>,
/// Channel to the erasure task handler.
erasure_task_tx: futures::channel::mpsc::Sender<ErasureTask>,
}

/// Parameters needed for fetching systematic chunks.
pub struct FetchSystematicChunksParams {
/// Validators that hold the systematic chunks.
pub validators: BTreeMap<ChunkIndex, ValidatorIndex>,
pub validators: VecDeque<(ChunkIndex, ValidatorIndex)>,
/// Channel to the erasure task handler.
pub erasure_task_tx: futures::channel::mpsc::Sender<ErasureTask>,
}
Expand Down Expand Up @@ -656,7 +679,7 @@ impl FetchSystematicChunks {
target: LOG_TARGET,
candidate_hash = ?common_params.candidate_hash,
erasure_root = ?common_params.erasure_root,
"Data recovery from systematic chunks complete",
"Recovery from systematic chunks complete",
);

Ok(data)
Expand Down Expand Up @@ -711,11 +734,11 @@ impl<Sender: overseer::AvailabilityRecoverySenderTrait> RecoveryStrategy<Sender>
if !common_params.bypass_availability_store {
let local_chunk_indices = state.populate_from_av_store(common_params, sender).await;

for c_index in &local_chunk_indices {
for our_c_index in &local_chunk_indices {
// If we are among the systematic validators but hold an invalid chunk, we cannot
// perform the systematic recovery. Fall through to the next strategy.
if self.validators.contains_key(c_index) &&
!state.received_chunks.contains_key(c_index)
if self.validators.iter().find(|(c_index, _)| c_index == our_c_index).is_some() &&
!state.received_chunks.contains_key(our_c_index)
{
gum::debug!(
target: LOG_TARGET,
Expand All @@ -724,32 +747,31 @@ impl<Sender: overseer::AvailabilityRecoverySenderTrait> RecoveryStrategy<Sender>
requesting = %state.requesting_chunks.len(),
total_requesting = %state.requesting_chunks.total_len(),
n_validators = %common_params.n_validators,
chunk_index = ?our_c_index,
"Systematic chunk recovery is not possible. We are among the systematic validators but hold an invalid chunk",
);
return Err(RecoveryError::Unavailable)
}
}
}

let mut systematic_chunk_count = self
.validators
.iter()
.filter(|(c_index, _)| state.received_chunks.contains_key(c_index))
.count();
// Instead of counting the chunks we already have, perform the difference after we remove
// them from the queue.
let mut systematic_chunk_count = self.validators.len();

// No need to query the validators that have the chunks we already received.
self.validators
.retain(|c_index, _| !state.received_chunks.contains_key(c_index));
.retain(|(c_index, _)| !state.received_chunks.contains_key(c_index));

systematic_chunk_count -= self.validators.len();

// Safe to `take` here, as we're consuming `self` anyway and we're not using the
// `validators` field in other methods.
let mut validators_queue: VecDeque<_> =
std::mem::take(&mut self.validators).into_iter().collect();
let mut validators_queue: VecDeque<_> = std::mem::take(&mut self.validators);

loop {
// If received_chunks has `systematic_chunk_threshold` entries, attempt to recover the
// data. If that fails, or a re-encoding of it doesn't match the expected erasure root,
// return Err(RecoveryError::Invalid)
// data.
if systematic_chunk_count >= self.threshold {
return self.attempt_systematic_recovery(state, common_params).await
}
Expand All @@ -764,11 +786,11 @@ impl<Sender: overseer::AvailabilityRecoverySenderTrait> RecoveryStrategy<Sender>
target: LOG_TARGET,
candidate_hash = ?common_params.candidate_hash,
erasure_root = ?common_params.erasure_root,
received = %systematic_chunk_count,
%systematic_chunk_count,
requesting = %state.requesting_chunks.len(),
total_requesting = %state.requesting_chunks.total_len(),
n_validators = %common_params.n_validators,
threshold = ?self.threshold,
systematic_threshold = ?self.threshold,
"Data recovery is not possible",
);

Expand All @@ -783,7 +805,7 @@ impl<Sender: overseer::AvailabilityRecoverySenderTrait> RecoveryStrategy<Sender>
?common_params.candidate_hash,
?desired_requests_count,
total_received = ?systematic_chunk_count,
threshold = ?self.threshold,
systematic_threshold = ?self.threshold,
?already_requesting_count,
"Requesting systematic availability chunks for a candidate",
);
Expand All @@ -803,23 +825,25 @@ impl<Sender: overseer::AvailabilityRecoverySenderTrait> RecoveryStrategy<Sender>
&mut validators_queue,
|unrequested_validators,
in_flight_reqs,
// Don't use this chunk count, as it may contain non-systematic chunks.
_chunk_count,
error_count,
success_responses| {
let chunk_count = systematic_chunk_count + success_responses;
let is_unavailable = Self::is_unavailable(
unrequested_validators,
in_flight_reqs,
systematic_chunk_count + success_responses,
chunk_count,
self.threshold,
);

error_count > 0 ||
(systematic_chunk_count + success_responses) >= self.threshold ||
is_unavailable
error_count > 0 || chunk_count >= self.threshold || is_unavailable
},
)
.await;

systematic_chunk_count += total_responses - error_count;

// We can't afford any errors, as we need all the systematic chunks for this to work.
if error_count > 0 {
gum::debug!(
Expand All @@ -830,14 +854,12 @@ impl<Sender: overseer::AvailabilityRecoverySenderTrait> RecoveryStrategy<Sender>
requesting = %state.requesting_chunks.len(),
total_requesting = %state.requesting_chunks.total_len(),
n_validators = %common_params.n_validators,
threshold = ?self.threshold,
"Systematic chunk recovery is not possible. ",
systematic_threshold = ?self.threshold,
"Systematic chunk recovery is not possible. Got an error while requesting a chunk",
);

return Err(RecoveryError::Unavailable)
}

systematic_chunk_count += total_responses;
}
}
}
Expand All @@ -848,17 +870,15 @@ pub struct FetchChunks {
error_count: usize,
/// Total number of responses that have been received, including failed ones.
total_received_responses: usize,

/// A random shuffling of the validators which indicates the order in which we connect to the
/// validators and request the chunk from them.
/// The collection of chunk indices and the respective validators holding the chunks.
validators: VecDeque<(ChunkIndex, ValidatorIndex)>,

/// Channel to the erasure task handler.
erasure_task_tx: futures::channel::mpsc::Sender<ErasureTask>,
}

/// Parameters specific to the `FetchChunks` strategy.
pub struct FetchChunksParams {
/// The collection of chunk indices and the respective validators holding the chunks.
pub validators: VecDeque<(ChunkIndex, ValidatorIndex)>,
/// Channel to the erasure task handler.
pub erasure_task_tx: futures::channel::mpsc::Sender<ErasureTask>,
Expand All @@ -867,7 +887,10 @@ pub struct FetchChunksParams {
impl FetchChunks {
/// Instantiate a new strategy.
pub fn new(mut params: FetchChunksParams) -> Self {
// Shuffle the validators to make sure that we don't request chunks from the same
// validators over and over.
params.validators.make_contiguous().shuffle(&mut rand::thread_rng());

Self {
error_count: 0,
total_received_responses: 0,
Expand Down