Skip to content
This repository was archived by the owner on Nov 15, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
pvf: ensure enough stack space
  • Loading branch information
pepyakin committed Jun 21, 2022
commit 357bb5fdf89c0dac3cb49bf5dc194461346e463d
7 changes: 4 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions node/core/pvf/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ gum = { package = "tracing-gum", path = "../../gum" }
pin-project = "1.0.9"
rand = "0.8.5"
tempfile = "3.3.0"
crossbeam-utils = "0.8.9"
parity-scale-codec = { version = "3.1.2", default-features = false, features = ["derive"] }
polkadot-parachain = { path = "../../../parachain" }
polkadot-core-primitives = { path = "../../../core-primitives" }
Expand Down
15 changes: 15 additions & 0 deletions node/core/pvf/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.

use parity_scale_codec::{Decode, Encode};
use std::any::Any;

/// Result of PVF preparation performed by the validation host.
pub type PrepareResult = Result<(), PrepareError>;
Expand Down Expand Up @@ -108,3 +109,17 @@ impl From<PrepareError> for ValidationError {
}
}
}

/// Attempt to convert an opaque panic payload to a string.
///
/// This is a best effort, and is not guaranteed to provide the most accurate value.
pub(crate) fn stringify_panic_payload(payload: Box<dyn Any + Send + 'static>) -> String {
match payload.downcast::<&'static str>() {
Ok(msg) => msg.to_string(),
Err(payload) => match payload.downcast::<String>() {
Ok(msg) => *msg,
// At least we tried...
Err(_) => "unkown panic payload".to_string(),
},
}
}
73 changes: 72 additions & 1 deletion node/core/pvf/src/executor_intf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ use std::{
const DEFAULT_HEAP_PAGES_ESTIMATE: u64 = 32;
const EXTRA_HEAP_PAGES: u64 = 2048;

/// The number of bytes devoted for the stack during wasm execution of a PVF.
const NATIVE_STACK_MAX: u32 = 256 * 1024 * 1024;

const CONFIG: Config = Config {
allow_missing_func_imports: true,
cache_path: None,
Expand All @@ -69,7 +72,7 @@ const CONFIG: Config = Config {
// the stack limit set by the wasmtime.
deterministic_stack_limit: Some(DeterministicStackLimit {
logical_max: 65536,
native_stack_max: 256 * 1024 * 1024,
native_stack_max: NATIVE_STACK_MAX,
}),
canonicalize_nans: true,
// Rationale for turning the multi-threaded compilation off is to make the preparation time
Expand Down Expand Up @@ -115,6 +118,74 @@ pub unsafe fn execute(
compiled_artifact_path: &Path,
params: &[u8],
spawner: impl sp_core::traits::SpawnNamed + 'static,
) -> Result<Vec<u8>, String> {
// Wasmtime powers the Substrate Executor. It compiles the wasm bytecode into native code.
// That native code does not create any stacks and just reuses the stack of the thread that
// wasmtime was invoked from.
//
// Also, we configure the executor to provide the deterministic stack and that requires
// supplying the amount of the native stack space that wasm is allowed to use. This is
// realized by supplying the limit into `wasmtime::Config::max_wasm_stack`.
//
// There are quirks to that configuration knob:
//
// 1. It only limits the amount of stack space consumed by wasm but does not ensure nor check
// that the stack space is actually available.
//
// That means, if the calling thread has 1 MiB of stack space left and the wasm code consumes
// more, then the wasmtime limit will **not** trigger. Instead, the wasm code will hit the
// guard page and the Rust stack overflow handler will be triggered. That leads to an
// **abort**.
//
// 2. It cannot and does not limit the stack space consumed by Rust code.
//
// Meaning that if the wasm code leaves no stack space for Rust code, then the Rust code
// and that will abort the process as well.
//
// Typically on Linux the main thread gets the stack size specified by the `ulimit` and
// typically it's configured to 8 MiB. Rust's spawned threads are 2 MiB. OTOH, the
// NATIVE_STACK_MAX is set to 256 MiB. Not nearly enough.
//
// Hence we need to increase it.
//
// The simplest way to fix that is to spawn a new thread with the desired stack limit. One may
// think that it may be too expensive, but I disagree. The cost of creating a new thread is
// measured in microseconds, negligable compared to other costs.
//
// The reasoning why we pick this particular size is:
//
// The default Rust thread stack limit 2 MiB + 256 MiB wasm stack.
let thread_stack_size = 2 * 1024 * 1024 + NATIVE_STACK_MAX as usize;

crossbeam_utils::thread::scope(|s| {
s.builder()
.name("wasm executor".to_string())
.stack_size(thread_stack_size)
.spawn(move |_| do_execute(compiled_artifact_path, params, spawner))
.map_err(|e| format!("failed to spawn executor thread: {}", e))?
.join()
.map_err(|e| {
format!(
"the executor thread panicked: {}",
crate::error::stringify_panic_payload(e)
)
})?
.map_err(|e| format!("executor failed: {}", e))
})
.map_err(|e| {
// despite the fact that we join the thread above and handle this, we still got an error.
// Not sure how this could've happened, but we just handle by returning an error.
format!(
"the executor thread panicked regardless of joining: {}",
crate::error::stringify_panic_payload(e)
)
})?
}

unsafe fn do_execute(
compiled_artifact_path: &Path,
params: &[u8],
spawner: impl sp_core::traits::SpawnNamed + 'static,
) -> Result<Vec<u8>, sc_executor_common::error::Error> {
let mut extensions = sp_externalities::Extensions::new();

Expand Down
20 changes: 4 additions & 16 deletions node/core/pvf/src/prepare/worker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use async_std::{
};
use parity_scale_codec::{Decode, Encode};
use sp_core::hexdisplay::HexDisplay;
use std::{any::Any, panic, sync::Arc, time::Duration};
use std::{panic, sync::Arc, time::Duration};

/// The time period after which the preparation worker is considered unresponsive and will be killed.
// NOTE: If you change this make sure to fix the buckets of `pvf_preparation_time` metric.
Expand Down Expand Up @@ -294,20 +294,8 @@ fn prepare_artifact(code: &[u8]) -> Result<CompiledArtifact, PrepareError> {
Err(err) => Err(PrepareError::Preparation(format!("{:?}", err))),
}
})
.map_err(|panic_payload| PrepareError::Panic(stringify_panic_payload(panic_payload)))
.map_err(|panic_payload| {
PrepareError::Panic(crate::error::stringify_panic_payload(panic_payload))
})
.and_then(|inner_result| inner_result)
}

/// Attempt to convert an opaque panic payload to a string.
///
/// This is a best effort, and is not guaranteed to provide the most accurate value.
fn stringify_panic_payload(payload: Box<dyn Any + Send + 'static>) -> String {
match payload.downcast::<&'static str>() {
Ok(msg) => msg.to_string(),
Err(payload) => match payload.downcast::<String>() {
Ok(msg) => *msg,
// At least we tried...
Err(_) => "unkown panic payload".to_string(),
},
}
}