Skip to content
This repository was archived by the owner on Nov 15, 2023. It is now read-only.

Commit f0943e6

Browse files
committed
pvf host: store only compiled artifacts on disk
1 parent c406f7b commit f0943e6

File tree

7 files changed

+162
-101
lines changed

7 files changed

+162
-101
lines changed

node/core/pvf/src/artifacts.rs

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,33 +23,44 @@ use std::{
2323
time::{Duration, SystemTime},
2424
};
2525

26-
/// A final product of preparation process. Contains either a ready to run compiled artifact or
27-
/// a description what went wrong.
28-
#[derive(Encode, Decode)]
29-
pub enum Artifact {
26+
/// An error that occurred during the prepare part of the PVF pipeline.
27+
#[derive(Debug, Clone, Encode, Decode)]
28+
pub enum PrepareError {
3029
/// During the prevalidation stage of preparation an issue was found with the PVF.
31-
PrevalidationErr(String),
30+
Prevalidation(String),
3231
/// Compilation failed for the given PVF.
33-
PreparationErr(String),
32+
Preparation(String),
3433
/// This state indicates that the process assigned to prepare the artifact wasn't responsible
3534
/// or were killed. This state is reported by the validation host (not by the worker).
36-
DidntMakeIt,
37-
/// The PVF passed all the checks and is ready for execution.
38-
Compiled { compiled_artifact: Vec<u8> },
35+
DidNotMakeIt,
3936
}
4037

41-
impl Artifact {
42-
/// Serializes this struct into a byte buffer.
43-
pub fn serialize(&self) -> Vec<u8> {
44-
self.encode()
38+
/// A wrapper for the compiled PVF code.
39+
#[derive(Encode, Decode)]
40+
pub struct CompiledArtifact(Vec<u8>);
41+
42+
impl CompiledArtifact {
43+
pub fn new(code: Vec<u8>) -> Self {
44+
Self(code)
4545
}
46+
}
4647

47-
/// Deserialize the given byte buffer to an artifact.
48-
pub fn deserialize(mut bytes: &[u8]) -> Result<Self, String> {
49-
Artifact::decode(&mut bytes).map_err(|e| format!("{:?}", e))
48+
impl AsRef<[u8]> for CompiledArtifact {
49+
fn as_ref(&self) -> &[u8] {
50+
self.0.as_slice()
5051
}
5152
}
5253

54+
/// A final product of preparation process. Contains either a ready to run compiled artifact or
55+
/// a description what went wrong.
56+
#[derive(Encode, Decode)]
57+
pub enum Artifact {
58+
/// An error occurred during the prepare part of the PVF pipeline.
59+
Error(PrepareError),
60+
/// The PVF passed all the checks and is ready for execution.
61+
Compiled(CompiledArtifact),
62+
}
63+
5364
/// Identifier of an artifact. Right now it only encodes a code hash of the PVF. But if we get to
5465
/// multiple engine implementations the artifact ID should include the engine type as well.
5566
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
@@ -117,6 +128,9 @@ pub enum ArtifactState {
117128
},
118129
/// A task to prepare this artifact is scheduled.
119130
Preparing,
131+
/// The code couldn't be compiled due to an error. Such artifacts
132+
/// never reach the executor and stay in the host's memory.
133+
FailedToProcess(PrepareError),
120134
}
121135

122136
/// A container of all known artifact ids and their states.

node/core/pvf/src/error.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
// You should have received a copy of the GNU General Public License
1515
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
1616

17+
use crate::artifacts::PrepareError;
18+
1719
/// A error raised during validation of the candidate.
1820
#[derive(Debug, Clone)]
1921
pub enum ValidationError {
@@ -54,3 +56,14 @@ pub enum InvalidCandidate {
5456
/// PVF execution (compilation is not included) took more time than was allotted.
5557
HardTimeout,
5658
}
59+
60+
impl From<PrepareError> for ValidationError {
61+
fn from(error: PrepareError) -> Self {
62+
let error_str = match error {
63+
PrepareError::Prevalidation(err) => err,
64+
PrepareError::Preparation(err) => err,
65+
PrepareError::DidNotMakeIt => "preparation timeout".to_owned(),
66+
};
67+
ValidationError::InvalidCandidate(InvalidCandidate::WorkerReportedError(error_str))
68+
}
69+
}

node/core/pvf/src/execute/worker.rs

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
1616

1717
use crate::{
18-
artifacts::{Artifact, ArtifactPathId},
18+
artifacts::{ArtifactPathId, CompiledArtifact},
1919
executor_intf::TaskExecutor,
2020
worker_common::{
2121
bytes_to_path, framed_recv, framed_send, path_to_bytes, spawn_with_program_path,
@@ -217,18 +217,12 @@ async fn validate_using_artifact(
217217
Ok(b) => b,
218218
};
219219

220-
let artifact = match Artifact::deserialize(&artifact_bytes) {
220+
let artifact = match CompiledArtifact::decode(&mut artifact_bytes.as_slice()) {
221221
Err(e) => return Response::InternalError(format!("artifact deserialization: {:?}", e)),
222222
Ok(a) => a,
223223
};
224224

225-
let compiled_artifact = match &artifact {
226-
Artifact::PrevalidationErr(msg) => return Response::format_invalid("prevalidation", msg),
227-
Artifact::PreparationErr(msg) => return Response::format_invalid("preparation", msg),
228-
Artifact::DidntMakeIt => return Response::format_invalid("preparation timeout", ""),
229-
230-
Artifact::Compiled { compiled_artifact } => compiled_artifact,
231-
};
225+
let compiled_artifact = artifact.as_ref();
232226

233227
let validation_started_at = Instant::now();
234228
let descriptor_bytes = match unsafe {

node/core/pvf/src/host.rs

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -327,8 +327,7 @@ async fn run(
327327
.await);
328328
},
329329
from_prepare_queue = from_prepare_queue_rx.next() => {
330-
let prepare::FromQueue::Prepared(artifact_id)
331-
= break_if_fatal!(from_prepare_queue.ok_or(Fatal));
330+
let from_queue = break_if_fatal!(from_prepare_queue.ok_or(Fatal));
332331

333332
// Note that preparation always succeeds.
334333
//
@@ -344,7 +343,7 @@ async fn run(
344343
&mut artifacts,
345344
&mut to_execute_queue_tx,
346345
&mut awaiting_prepare,
347-
artifact_id,
346+
from_queue,
348347
).await);
349348
},
350349
}
@@ -419,6 +418,9 @@ async fn handle_execute_pvf(
419418

420419
awaiting_prepare.add(artifact_id, params, result_tx);
421420
},
421+
ArtifactState::FailedToProcess(error) => {
422+
let _ = result_tx.send(Err(ValidationError::from(error.clone())));
423+
},
422424
}
423425
} else {
424426
// Artifact is unknown: register it and enqueue a job with the corresponding priority and
@@ -450,6 +452,7 @@ async fn handle_heads_up(
450452
// Already preparing. We don't need to send a priority amend either because
451453
// it can't get any lower than the background.
452454
},
455+
ArtifactState::FailedToProcess(_) => {},
453456
}
454457
} else {
455458
// The artifact is unknown: register it and put a background job into the prepare queue.
@@ -471,8 +474,10 @@ async fn handle_prepare_done(
471474
artifacts: &mut Artifacts,
472475
execute_queue: &mut mpsc::Sender<execute::ToQueue>,
473476
awaiting_prepare: &mut AwaitingPrepare,
474-
artifact_id: ArtifactId,
477+
from_queue: prepare::FromQueue,
475478
) -> Result<(), Fatal> {
479+
let prepare::FromQueue { artifact_id, result } = from_queue;
480+
476481
// Make some sanity checks and extract the current state.
477482
let state = match artifacts.artifact_state_mut(&artifact_id) {
478483
None => {
@@ -493,9 +498,21 @@ async fn handle_prepare_done(
493498
never!("the artifact is already prepared: {:?}", artifact_id);
494499
return Ok(())
495500
},
501+
Some(ArtifactState::FailedToProcess(_)) => {
502+
// The reasoning is similar to the above, the artifact cannot be
503+
// processed at this point.
504+
never!("the artifact is already processed unsuccessfully: {:?}", artifact_id);
505+
return Ok(())
506+
},
496507
Some(state @ ArtifactState::Preparing) => state,
497508
};
498509

510+
// Don't send failed artifacts to the execution's queue.
511+
if let Err(error) = result {
512+
*state = ArtifactState::FailedToProcess(error);
513+
return Ok(())
514+
}
515+
499516
// It's finally time to dispatch all the execution requests that were waiting for this artifact
500517
// to be prepared.
501518
let pending_requests = awaiting_prepare.take(&artifact_id);
@@ -895,7 +912,7 @@ mod tests {
895912
);
896913

897914
test.from_prepare_queue_tx
898-
.send(prepare::FromQueue::Prepared(artifact_id(1)))
915+
.send(prepare::FromQueue { artifact_id: artifact_id(1), result: Ok(()) })
899916
.await
900917
.unwrap();
901918
let result_tx_pvf_1_1 = assert_matches!(
@@ -908,7 +925,7 @@ mod tests {
908925
);
909926

910927
test.from_prepare_queue_tx
911-
.send(prepare::FromQueue::Prepared(artifact_id(2)))
928+
.send(prepare::FromQueue { artifact_id: artifact_id(2), result: Ok(()) })
912929
.await
913930
.unwrap();
914931
let result_tx_pvf_2 = assert_matches!(
@@ -957,7 +974,7 @@ mod tests {
957974
);
958975

959976
test.from_prepare_queue_tx
960-
.send(prepare::FromQueue::Prepared(artifact_id(1)))
977+
.send(prepare::FromQueue { artifact_id: artifact_id(1), result: Ok(()) })
961978
.await
962979
.unwrap();
963980

node/core/pvf/src/prepare/pool.rs

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
use super::worker::{self, Outcome};
1818
use crate::{
19+
artifacts::PrepareError,
1920
metrics::Metrics,
2021
worker_common::{IdleWorker, WorkerHandle},
2122
LOG_TARGET,
@@ -80,7 +81,13 @@ pub enum FromPool {
8081

8182
/// The given worker either succeeded or failed the given job. Under any circumstances the
8283
/// artifact file has been written. The `bool` says whether the worker ripped.
83-
Concluded(Worker, bool),
84+
Concluded {
85+
worker: Worker,
86+
rip: bool,
87+
/// [`Ok`] indicates that compiled artifact is successfully stored on disk.
88+
/// Otherwise, an [error](PrepareError) is supplied.
89+
result: Result<(), PrepareError>,
90+
},
8491

8592
/// The given worker ceased to exist.
8693
Rip(Worker),
@@ -295,7 +302,7 @@ fn handle_mux(
295302
},
296303
PoolEvent::StartWork(worker, outcome) => {
297304
match outcome {
298-
Outcome::Concluded(idle) => {
305+
Outcome::Concluded { worker: idle, result } => {
299306
let data = match spawned.get_mut(worker) {
300307
None => {
301308
// Perhaps the worker was killed meanwhile and the result is no longer
@@ -310,7 +317,7 @@ fn handle_mux(
310317
let old = data.idle.replace(idle);
311318
assert_matches!(old, None, "attempt to overwrite an idle worker");
312319

313-
reply(from_pool, FromPool::Concluded(worker, false))?;
320+
reply(from_pool, FromPool::Concluded { worker, rip: false, result })?;
314321

315322
Ok(())
316323
},
@@ -321,9 +328,16 @@ fn handle_mux(
321328

322329
Ok(())
323330
},
324-
Outcome::DidntMakeIt => {
331+
Outcome::DidNotMakeIt => {
325332
if attempt_retire(metrics, spawned, worker) {
326-
reply(from_pool, FromPool::Concluded(worker, true))?;
333+
reply(
334+
from_pool,
335+
FromPool::Concluded {
336+
worker,
337+
rip: true,
338+
result: Err(PrepareError::DidNotMakeIt),
339+
},
340+
)?;
327341
}
328342

329343
Ok(())

node/core/pvf/src/prepare/queue.rs

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@
1717
//! A queue that handles requests for PVF preparation.
1818
1919
use super::pool::{self, Worker};
20-
use crate::{artifacts::ArtifactId, metrics::Metrics, Priority, Pvf, LOG_TARGET};
20+
use crate::{
21+
artifacts::{ArtifactId, PrepareError},
22+
metrics::Metrics,
23+
Priority, Pvf, LOG_TARGET,
24+
};
2125
use always_assert::{always, never};
2226
use async_std::path::PathBuf;
2327
use futures::{channel::mpsc, stream::StreamExt as _, Future, SinkExt};
@@ -37,9 +41,13 @@ pub enum ToQueue {
3741
}
3842

3943
/// A response from queue.
40-
#[derive(Debug, PartialEq, Eq)]
41-
pub enum FromQueue {
42-
Prepared(ArtifactId),
44+
#[derive(Debug)]
45+
pub struct FromQueue {
46+
/// Identifier of an artifact.
47+
pub(crate) artifact_id: ArtifactId,
48+
/// Outcome of the PVF processing. [`Ok`] indicates that compiled artifact
49+
/// is successfully stored on disk. Otherwise, an [error](PrepareError) is supplied.
50+
pub(crate) result: Result<(), PrepareError>,
4351
}
4452

4553
#[derive(Default)]
@@ -299,7 +307,8 @@ async fn handle_from_pool(queue: &mut Queue, from_pool: pool::FromPool) -> Resul
299307
use pool::FromPool::*;
300308
match from_pool {
301309
Spawned(worker) => handle_worker_spawned(queue, worker).await?,
302-
Concluded(worker, rip) => handle_worker_concluded(queue, worker, rip).await?,
310+
Concluded { worker, rip, result } =>
311+
handle_worker_concluded(queue, worker, rip, result).await?,
303312
Rip(worker) => handle_worker_rip(queue, worker).await?,
304313
}
305314
Ok(())
@@ -320,6 +329,7 @@ async fn handle_worker_concluded(
320329
queue: &mut Queue,
321330
worker: Worker,
322331
rip: bool,
332+
result: Result<(), PrepareError>,
323333
) -> Result<(), Fatal> {
324334
queue.metrics.prepare_concluded();
325335

@@ -377,7 +387,7 @@ async fn handle_worker_concluded(
377387
"prepare worker concluded",
378388
);
379389

380-
reply(&mut queue.from_queue_tx, FromQueue::Prepared(artifact_id))?;
390+
reply(&mut queue.from_queue_tx, FromQueue { artifact_id, result })?;
381391

382392
// Figure out what to do with the worker.
383393
if rip {
@@ -641,12 +651,9 @@ mod tests {
641651

642652
let w = test.workers.insert(());
643653
test.send_from_pool(pool::FromPool::Spawned(w));
644-
test.send_from_pool(pool::FromPool::Concluded(w, false));
654+
test.send_from_pool(pool::FromPool::Concluded { worker: w, rip: false, result: Ok(()) });
645655

646-
assert_eq!(
647-
test.poll_and_recv_from_queue().await,
648-
FromQueue::Prepared(pvf(1).as_artifact_id())
649-
);
656+
assert_eq!(test.poll_and_recv_from_queue().await.artifact_id, pvf(1).as_artifact_id());
650657
}
651658

652659
#[async_std::test]
@@ -671,7 +678,7 @@ mod tests {
671678
assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
672679
assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
673680

674-
test.send_from_pool(pool::FromPool::Concluded(w1, false));
681+
test.send_from_pool(pool::FromPool::Concluded { worker: w1, rip: false, result: Ok(()) });
675682

676683
assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
677684

@@ -704,7 +711,7 @@ mod tests {
704711
// That's a bit silly in this context, but in production there will be an entire pool up
705712
// to the `soft_capacity` of workers and it doesn't matter which one to cull. Either way,
706713
// we just check that edge case of an edge case works.
707-
test.send_from_pool(pool::FromPool::Concluded(w1, false));
714+
test.send_from_pool(pool::FromPool::Concluded { worker: w1, rip: false, result: Ok(()) });
708715
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Kill(w1));
709716
}
710717

@@ -749,15 +756,12 @@ mod tests {
749756
assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
750757

751758
// Conclude worker 1 and rip it.
752-
test.send_from_pool(pool::FromPool::Concluded(w1, true));
759+
test.send_from_pool(pool::FromPool::Concluded { worker: w1, rip: true, result: Ok(()) });
753760

754761
// Since there is still work, the queue requested one extra worker to spawn to handle the
755762
// remaining enqueued work items.
756763
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
757-
assert_eq!(
758-
test.poll_and_recv_from_queue().await,
759-
FromQueue::Prepared(pvf(1).as_artifact_id())
760-
);
764+
assert_eq!(test.poll_and_recv_from_queue().await.artifact_id, pvf(1).as_artifact_id());
761765
}
762766

763767
#[async_std::test]
@@ -773,7 +777,7 @@ mod tests {
773777

774778
assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
775779

776-
test.send_from_pool(pool::FromPool::Concluded(w1, true));
780+
test.send_from_pool(pool::FromPool::Concluded { worker: w1, rip: true, result: Ok(()) });
777781
test.poll_ensure_to_pool_is_empty().await;
778782
}
779783

0 commit comments

Comments
 (0)