Skip to content
This repository was archived by the owner on Nov 15, 2023. It is now read-only.

Commit c10f061

Browse files
authored
Treat non-deterministic prep errors as internal errors (#4364)
Closes #4293 This PR changes the way how we treat a certain subset of PVF preparation errors. Specifically, now only the deterministic errors are treated as invalid candidates. That is, the errors that are easily attributable to either the the PVF contents or the wasmtime code, but not e.g. I/O errors that could be triggered by the OS (insufficient memory, disk failure, too much load, etc). The latter are treated as internal errors and thus do not trigger the disputes.
1 parent bd69f54 commit c10f061

File tree

3 files changed

+34
-12
lines changed

3 files changed

+34
-12
lines changed

node/core/candidate-validation/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,8 @@ async fn validate_candidate_exhaustive(
459459
Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(
460460
"ambiguous worker death".to_string(),
461461
))),
462+
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::PrepareError(e))) =>
463+
Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(e))),
462464

463465
Ok(res) =>
464466
if res.head_data.hash() != descriptor.para_head {

node/core/pvf/src/error.rs

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ pub enum ValidationError {
4848
/// of the candidate [`polkadot_parachain::primitives::ValidationParams`] and the PVF.
4949
#[derive(Debug, Clone)]
5050
pub enum InvalidCandidate {
51-
/// The failure is reported by the worker. The string contains the error message.
52-
///
53-
/// This also includes the errors reported by the preparation pipeline.
51+
/// PVF preparation ended up with a deterministic error.
52+
PrepareError(String),
53+
/// The failure is reported by the execution worker. The string contains the error message.
5454
WorkerReportedError(String),
5555
/// The worker has died during validation of a candidate. That may fall in one of the following
5656
/// categories, which we cannot distinguish programmatically:
@@ -78,13 +78,33 @@ pub enum InvalidCandidate {
7878

7979
impl From<PrepareError> for ValidationError {
8080
fn from(error: PrepareError) -> Self {
81-
let error_str = match error {
82-
PrepareError::Prevalidation(err) => format!("prevalidation: {}", err),
83-
PrepareError::Preparation(err) => format!("preparation: {}", err),
84-
PrepareError::Panic(err) => format!("panic: {}", err),
85-
PrepareError::TimedOut => "preparation timeout".to_owned(),
86-
PrepareError::DidNotMakeIt => "communication error".to_owned(),
87-
};
88-
ValidationError::InvalidCandidate(InvalidCandidate::WorkerReportedError(error_str))
81+
// Here we need to classify the errors into two errors: deterministic and non-deterministic.
82+
//
83+
// Non-deterministic errors can happen spuriously. Typically, they occur due to resource
84+
// starvation, e.g. under heavy load or memory pressure. Those errors are typically transient
85+
// but may persist e.g. if the node is run by overwhelmingly underpowered machine.
86+
//
87+
// Deterministic errors should trigger reliably. Those errors depend on the PVF itself and
88+
// the sc-executor/wasmtime logic.
89+
//
90+
// For now, at least until the PVF pre-checking lands, the deterministic errors will be
91+
// treated as `InvalidCandidate`. Should those occur they could potentially trigger disputes.
92+
//
93+
// All non-deterministic errors are qualified as `InternalError`s and will not trigger
94+
// disputes.
95+
match error {
96+
PrepareError::Prevalidation(err) => ValidationError::InvalidCandidate(
97+
InvalidCandidate::PrepareError(format!("prevalidation: {}", err)),
98+
),
99+
PrepareError::Preparation(err) => ValidationError::InvalidCandidate(
100+
InvalidCandidate::PrepareError(format!("preparation: {}", err)),
101+
),
102+
PrepareError::Panic(err) => ValidationError::InvalidCandidate(
103+
InvalidCandidate::PrepareError(format!("panic: {}", err)),
104+
),
105+
PrepareError::TimedOut => ValidationError::InternalError("prepare: timeout".to_owned()),
106+
PrepareError::DidNotMakeIt =>
107+
ValidationError::InternalError("prepare: did not make it".to_owned()),
108+
}
89109
}
90110
}

node/core/pvf/src/host.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1156,7 +1156,7 @@ mod tests {
11561156
assert_matches!(result_rx.now_or_never().unwrap().unwrap(), Err(PrepareError::TimedOut));
11571157
assert_matches!(
11581158
result_rx_execute.now_or_never().unwrap().unwrap(),
1159-
Err(ValidationError::InvalidCandidate(InvalidCandidate::WorkerReportedError(_)))
1159+
Err(ValidationError::InternalError(_))
11601160
);
11611161

11621162
// Reversed case: first send multiple precheck requests, then ask for an execution.

0 commit comments

Comments
 (0)