paritytech · alexggh · Sep 5, 2024 · Jul 24, 2024 · Jul 24, 2024 · Jul 25, 2024
diff --git a/cumulus/polkadot-parachain/polkadot-parachain-lib/src/command.rs b/cumulus/polkadot-parachain/polkadot-parachain-lib/src/command.rs
@@ -236,7 +236,10 @@ pub fn run<CliConfig: crate::cli::CliConfig>(cmd_config: RunConfig) -> Result<()
 				let hwbench = (!cli.no_hardware_benchmarks)
 					.then_some(config.database.path().map(|database_path| {
 						let _ = std::fs::create_dir_all(database_path);
-						sc_sysinfo::gather_hwbench(Some(database_path))
+						sc_sysinfo::gather_hwbench(
+							Some(database_path),
+							&SUBSTRATE_REFERENCE_HARDWARE,
+						)
 					}))
 					.flatten();
 

diff --git a/cumulus/polkadot-parachain/polkadot-parachain-lib/src/service.rs b/cumulus/polkadot-parachain/polkadot-parachain-lib/src/service.rs
@@ -853,7 +853,9 @@ where
 fn warn_if_slow_hardware(hwbench: &sc_sysinfo::HwBench) {
 	// Polkadot para-chains should generally use these requirements to ensure that the relay-chain
 	// will not take longer than expected to import its blocks.
-	if let Err(err) = frame_benchmarking_cli::SUBSTRATE_REFERENCE_HARDWARE.check_hardware(hwbench) {
+	if let Err(err) =
+		frame_benchmarking_cli::SUBSTRATE_REFERENCE_HARDWARE.check_hardware(hwbench, false)
+	{
 		log::warn!(
 			"⚠️  The hardware does not meet the minimal requirements {} for role 'Authority' find out more at:\n\
 			https://wiki.polkadot.network/docs/maintain-guides-how-to-validate-polkadot#reference-hardware",

diff --git a/polkadot/cli/src/command.rs b/polkadot/cli/src/command.rs
@@ -230,7 +230,7 @@ where
 		let hwbench = (!cli.run.no_hardware_benchmarks)
 			.then_some(config.database.path().map(|database_path| {
 				let _ = std::fs::create_dir_all(&database_path);
-				sc_sysinfo::gather_hwbench(Some(database_path))
+				sc_sysinfo::gather_hwbench(Some(database_path), &SUBSTRATE_REFERENCE_HARDWARE)
 			}))
 			.flatten();
 

diff --git a/polkadot/node/service/src/lib.rs b/polkadot/node/service/src/lib.rs
@@ -762,6 +762,7 @@ pub fn new_full<
 	use polkadot_availability_recovery::FETCH_CHUNKS_THRESHOLD;
 	use polkadot_node_network_protocol::request_response::IncomingRequest;
 	use sc_network_sync::WarpSyncConfig;
+	use sc_sysinfo::Metric;
 
 	let is_offchain_indexing_enabled = config.offchain_worker.indexing_enabled;
 	let role = config.role.clone();
@@ -1079,13 +1080,31 @@ pub fn new_full<
 
 	if let Some(hwbench) = hwbench {
 		sc_sysinfo::print_hwbench(&hwbench);
-		match SUBSTRATE_REFERENCE_HARDWARE.check_hardware(&hwbench) {
+		match SUBSTRATE_REFERENCE_HARDWARE.check_hardware(&hwbench, role.is_authority()) {
 			Err(err) if role.is_authority() => {
-				log::warn!(
-				"⚠️  The hardware does not meet the minimal requirements {} for role 'Authority' find out more at:\n\
-				https://wiki.polkadot.network/docs/maintain-guides-how-to-validate-polkadot#reference-hardware",
-				err
-			);
+				if err
+					.0
+					.iter()
+					.any(|failure| matches!(failure.metric, Metric::Blake2256Parallel { .. }))
+				{
+					log::warn!(
+						"⚠️  Starting January 2025 the hardware will fail the minimal physical CPU cores requirements {} for role 'Authority',\n\
+						    find out more when this will become mandatory at:\n\
+						    https://wiki.polkadot.network/docs/maintain-guides-how-to-validate-polkadot#reference-hardware",
+						err
+					);
+				}
+				if err
+					.0
+					.iter()
+					.any(|failure| !matches!(failure.metric, Metric::Blake2256Parallel { .. }))
+				{
+					log::warn!(
+						"⚠️  The hardware does not meet the minimal requirements {} for role 'Authority' find out more at:\n\
+						https://wiki.polkadot.network/docs/maintain-guides-how-to-validate-polkadot#reference-hardware",
+						err
+					);
+				}
 			},
 			_ => {},
 		}

diff --git a/substrate/bin/node/cli/src/service.rs b/substrate/bin/node/cli/src/service.rs
@@ -416,7 +416,7 @@ pub fn new_full_base<N: NetworkBackend<Block, <Block as BlockT>::Hash>>(
 	let hwbench = (!disable_hardware_benchmarks)
 		.then_some(config.database.path().map(|database_path| {
 			let _ = std::fs::create_dir_all(&database_path);
-			sc_sysinfo::gather_hwbench(Some(database_path))
+			sc_sysinfo::gather_hwbench(Some(database_path), &SUBSTRATE_REFERENCE_HARDWARE)
 		}))
 		.flatten();
 
@@ -553,7 +553,7 @@ pub fn new_full_base<N: NetworkBackend<Block, <Block as BlockT>::Hash>>(
 
 	if let Some(hwbench) = hwbench {
 		sc_sysinfo::print_hwbench(&hwbench);
-		match SUBSTRATE_REFERENCE_HARDWARE.check_hardware(&hwbench) {
+		match SUBSTRATE_REFERENCE_HARDWARE.check_hardware(&hwbench, false) {
 			Err(err) if role.is_authority() => {
 				log::warn!(
 					"⚠️  The hardware does not meet the minimal requirements {} for role 'Authority'.",

@@ -27,10 +27,10 @@ mod sysinfo;
 mod sysinfo_linux;
 
 pub use sysinfo::{
-	benchmark_cpu, benchmark_disk_random_writes, benchmark_disk_sequential_writes,
-	benchmark_memory, benchmark_sr25519_verify, gather_hwbench, gather_sysinfo,
-	serialize_throughput, serialize_throughput_option, Metric, Requirement, Requirements,
-	Throughput,
+	benchmark_cpu, benchmark_cpu_parallelism, benchmark_disk_random_writes,
+	benchmark_disk_sequential_writes, benchmark_memory, benchmark_sr25519_verify, gather_hwbench,
+	gather_sysinfo, serialize_throughput, serialize_throughput_option, Metric, Requirement,
+	Requirements, Throughput,
 };
 
 /// The operating system part of the current target triplet.
@@ -48,6 +48,10 @@ pub struct HwBench {
 	/// The CPU speed, as measured in how many MB/s it can hash using the BLAKE2b-256 hash.
 	#[serde(serialize_with = "serialize_throughput")]
 	pub cpu_hashrate_score: Throughput,
+	/// The parallel CPU speed, as measured in how many MB/s it can hash in parallel using the
+	/// BLAKE2b-256 hash.
+	#[serde(serialize_with = "serialize_throughput")]
+	pub parallel_cpu_hashrate_score: Throughput,
 	/// Memory bandwidth in MB/s, calculated by measuring the throughput of `memcpy`.
 	#[serde(serialize_with = "serialize_throughput")]
 	pub memory_memcpy_score: Throughput,
@@ -65,6 +69,7 @@ pub struct HwBench {
 	pub disk_random_write_score: Option<Throughput>,
 }
 
+#[derive(Copy, Clone, Debug)]
 /// Limit the execution time of a benchmark.
 pub enum ExecutionLimit {
 	/// Limit by the maximal duration.
@@ -132,7 +137,11 @@ pub fn print_sysinfo(sysinfo: &sc_telemetry::SysInfo) {
 
 /// Prints out the results of the hardware benchmarks in the logs.
 pub fn print_hwbench(hwbench: &HwBench) {
-	log::info!("🏁 CPU score: {}", hwbench.cpu_hashrate_score);
+	log::info!(
+		"🏁 CPU single core score: {}, parallelism score: {}",
+		hwbench.cpu_hashrate_score,
+		hwbench.parallel_cpu_hashrate_score,
+	);
 	log::info!("🏁 Memory score: {}", hwbench.memory_memcpy_score);
 
 	if let Some(score) = hwbench.disk_sequential_write_score {

@@ -22,16 +22,17 @@ use sc_telemetry::SysInfo;
 use sp_core::{sr25519, Pair};
 use sp_io::crypto::sr25519_verify;
 
+use core::f64;
 use derive_more::From;
 use rand::{seq::SliceRandom, Rng, RngCore};
 use serde::{de::Visitor, Deserialize, Deserializer, Serialize, Serializer};
 use std::{
-	fmt,
-	fmt::{Display, Formatter},
+	fmt::{self, Display, Formatter},
 	fs::File,
 	io::{Seek, SeekFrom, Write},
 	ops::{Deref, DerefMut},
 	path::{Path, PathBuf},
+	sync::{Arc, Barrier},
 	time::{Duration, Instant},
 };
 
@@ -42,6 +43,8 @@ pub enum Metric {
 	Sr25519Verify,
 	/// Blake2-256 hashing algorithm.
 	Blake2256,
+	/// Blake2-256 hashing algorithm executed in parallel
+	Blake2256Parallel { num_cores: usize },
 	/// Copying data in RAM.
 	MemCopy,
 	/// Disk sequential write.
@@ -85,7 +88,7 @@ impl Metric {
 	/// The category of the metric.
 	pub fn category(&self) -> &'static str {
 		match self {
-			Self::Sr25519Verify | Self::Blake2256 => "CPU",
+			Self::Sr25519Verify | Self::Blake2256 | Self::Blake2256Parallel { .. } => "CPU",
 			Self::MemCopy => "Memory",
 			Self::DiskSeqWrite | Self::DiskRndWrite => "Disk",
 		}
@@ -96,11 +99,21 @@ impl Metric {
 		match self {
 			Self::Sr25519Verify => "SR25519-Verify",
 			Self::Blake2256 => "BLAKE2-256",
+			Self::Blake2256Parallel { num_cores } =>
+				format!("BLAKE2-256-Parallel-{}", num_cores).leak(),
 			Self::MemCopy => "Copy",
 			Self::DiskSeqWrite => "Seq Write",
 			Self::DiskRndWrite => "Rnd Write",
 		}
 	}
+
+	/// The number of cores used in the parallel BLAKE2-256 hashing.
+	pub fn num_cores(&self) -> Option<usize> {
+		match self {
+			Self::Blake2256Parallel { num_cores } => Some(*num_cores),
+			_ => None,
+		}
+	}
 }
 
 /// The unit in which the [`Throughput`] (bytes per second) is denoted.
@@ -253,6 +266,10 @@ pub struct Requirement {
 		deserialize_with = "deserialize_throughput"
 	)]
 	pub minimum: Throughput,
+	/// Check this requirement only for relay chain authority nodes.
+	#[serde(default)]
+	#[serde(skip_serializing_if = "core::ops::Not::not")]
+	pub check_on_rc_authority: bool,
 }
 
 #[inline(always)]
@@ -343,8 +360,18 @@ fn clobber_value<T>(input: &mut T) {
 pub const DEFAULT_CPU_EXECUTION_LIMIT: ExecutionLimit =
 	ExecutionLimit::Both { max_iterations: 4 * 1024, max_duration: Duration::from_millis(100) };
 
-// This benchmarks the CPU speed as measured by calculating BLAKE2b-256 hashes, in bytes per second.
+// This benchmarks the single core CPU speed as measured by calculating BLAKE2b-256 hashes, in bytes
+// per second.
 pub fn benchmark_cpu(limit: ExecutionLimit) -> Throughput {
+	benchmark_cpu_parallelism(limit, 1)
+}
+
+// This benchmarks the entire CPU speed as measured by calculating BLAKE2b-256 hashes, in bytes per
+// second. It spawns multiple threads to measure the throughput of the entire CPU and averages the
+// score obtained by each thread. If we have at least `refhw_num_cores` available then the
+// average throughput should be relatively close to the single core performance as measured by
+// calling this function with refhw_num_cores equal to 1.
+pub fn benchmark_cpu_parallelism(limit: ExecutionLimit, refhw_num_cores: usize) -> Throughput {
 	// In general the results of this benchmark are somewhat sensitive to how much
 	// data we hash at the time. The smaller this is the *less* B/s we can hash,
 	// the bigger this is the *more* B/s we can hash, up until a certain point
@@ -359,20 +386,38 @@ pub fn benchmark_cpu(limit: ExecutionLimit) -> Throughput {
 	// but without hitting its theoretical maximum speed.
 	const SIZE: usize = 32 * 1024;
 
-	let mut buffer = Vec::new();
-	buffer.resize(SIZE, 0x66);
-	let mut hash = Default::default();
+	let ready_to_run_benchmark = Arc::new(Barrier::new(refhw_num_cores));
+	let mut benchmark_threads = Vec::new();
 
-	let run = || -> Result<(), ()> {
-		clobber_slice(&mut buffer);
-		hash = sp_crypto_hashing::blake2_256(&buffer);
-		clobber_slice(&mut hash);
+	// Spawn a thread for each expected core and average the throughput for each of them.
+	for _ in 0..refhw_num_cores {
+		let ready_to_run_benchmark = ready_to_run_benchmark.clone();
 
-		Ok(())
-	};
+		let handle = std::thread::spawn(move || {
+			let mut buffer = Vec::new();
+			buffer.resize(SIZE, 0x66);
+			let mut hash = Default::default();
 
-	benchmark("CPU score", SIZE, limit.max_iterations(), limit.max_duration(), run)
-		.expect("benchmark cannot fail; qed")
+			let run = || -> Result<(), ()> {
+				clobber_slice(&mut buffer);
+				hash = sp_crypto_hashing::blake2_256(&buffer);
+				clobber_slice(&mut hash);
+
+				Ok(())
+			};
+			ready_to_run_benchmark.wait();
+			benchmark("CPU score", SIZE, limit.max_iterations(), limit.max_duration(), run)
+				.expect("benchmark cannot fail; qed")
+		});
+		benchmark_threads.push(handle);
+	}
+
+	let average_score = benchmark_threads
+		.into_iter()
+		.map(|thread| thread.join().map(|throughput| throughput.as_kibs()).unwrap_or(0.0))
+		.sum::<f64>() /
+		refhw_num_cores as f64;
+	Throughput::from_kibs(average_score)
 }
 
 /// A default [`ExecutionLimit`] that can be used to call [`benchmark_memory`].
@@ -624,10 +669,20 @@ pub fn benchmark_sr25519_verify(limit: ExecutionLimit) -> Throughput {
 /// Optionally accepts a path to a `scratch_directory` to use to benchmark the
 /// disk. Also accepts the `requirements` for the hardware benchmark and a
 /// boolean to specify if the node is an authority.
-pub fn gather_hwbench(scratch_directory: Option<&Path>) -> HwBench {
+pub fn gather_hwbench(scratch_directory: Option<&Path>, requirements: &Requirements) -> HwBench {
+	let parallel_num_cores = requirements
+		.0
+		.iter()
+		.filter_map(|requirement| requirement.metric.num_cores())
+		.next()
+		.unwrap_or(1);
 	#[allow(unused_mut)]
 	let mut hwbench = HwBench {
 		cpu_hashrate_score: benchmark_cpu(DEFAULT_CPU_EXECUTION_LIMIT),
+		parallel_cpu_hashrate_score: benchmark_cpu_parallelism(
+			DEFAULT_CPU_EXECUTION_LIMIT,
+			parallel_num_cores,
+		),
 		memory_memcpy_score: benchmark_memory(DEFAULT_MEMORY_EXECUTION_LIMIT),
 		disk_sequential_write_score: None,
 		disk_random_write_score: None,
@@ -659,9 +714,17 @@ pub fn gather_hwbench(scratch_directory: Option<&Path>) -> HwBench {
 
 impl Requirements {
 	/// Whether the hardware requirements are met by the provided benchmark results.
-	pub fn check_hardware(&self, hwbench: &HwBench) -> Result<(), CheckFailures> {
+	pub fn check_hardware(
+		&self,
+		hwbench: &HwBench,
+		is_rc_authority: bool,
+	) -> Result<(), CheckFailures> {
 		let mut failures = Vec::new();
 		for requirement in self.0.iter() {
+			if requirement.check_on_rc_authority && !is_rc_authority {
+				continue
+			}
+
 			match requirement.metric {
 				Metric::Blake2256 =>
 					if requirement.minimum > hwbench.cpu_hashrate_score {
@@ -671,6 +734,14 @@ impl Requirements {
 							found: hwbench.cpu_hashrate_score,
 						});
 					},
+				Metric::Blake2256Parallel { .. } =>
+					if requirement.minimum > hwbench.parallel_cpu_hashrate_score {
+						failures.push(CheckFailure {
+							metric: requirement.metric,
+							expected: requirement.minimum,
+							found: hwbench.parallel_cpu_hashrate_score,
+						});
+					},
 				Metric::MemCopy =>
 					if requirement.minimum > hwbench.memory_memcpy_score {
 						failures.push(CheckFailure {
@@ -732,6 +803,13 @@ mod tests {
 		assert!(benchmark_cpu(DEFAULT_CPU_EXECUTION_LIMIT) > Throughput::from_mibs(0.0));
 	}
 
+	#[test]
+	fn test_benchmark_parallel_cpu() {
+		assert!(
+			benchmark_cpu_parallelism(DEFAULT_CPU_EXECUTION_LIMIT, 8) > Throughput::from_mibs(0.0)
+		);
+	}
+
 	#[test]
 	fn test_benchmark_memory() {
 		assert!(benchmark_memory(DEFAULT_MEMORY_EXECUTION_LIMIT) > Throughput::from_mibs(0.0));
@@ -781,13 +859,14 @@ mod tests {
 	fn hwbench_serialize_works() {
 		let hwbench = HwBench {
 			cpu_hashrate_score: Throughput::from_gibs(1.32),
+			parallel_cpu_hashrate_score: Throughput::from_gibs(1.32),
 			memory_memcpy_score: Throughput::from_kibs(9342.432),
 			disk_sequential_write_score: Some(Throughput::from_kibs(4332.12)),
 			disk_random_write_score: None,
 		};
 
 		let serialized = serde_json::to_string(&hwbench).unwrap();
 		// Throughput from all of the benchmarks should be converted to MiBs.
-		assert_eq!(serialized, "{\"cpu_hashrate_score\":1351,\"memory_memcpy_score\":9,\"disk_sequential_write_score\":4}");
+		assert_eq!(serialized, "{\"cpu_hashrate_score\":1351,\"parallel_cpu_hashrate_score\":1351,\"memory_memcpy_score\":9,\"disk_sequential_write_score\":4}");
 	}
 }