From c4105ee308baf89b1d06333e5f6f5fb7090ac74d Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Wed, 17 Sep 2025 16:47:53 -0300 Subject: [PATCH 001/115] Made it so that it writes stts for default --- Cargo.lock | 2 ++ cmd/ethrex/Cargo.toml | 4 +-- crates/networking/p2p/Cargo.toml | 5 ++- crates/networking/p2p/peer_handler.rs | 28 +++++++-------- crates/networking/p2p/utils.rs | 49 +++++++++++++++++++++++++-- 5 files changed, 67 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 05b9142329e..acc0f3ba3fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4300,6 +4300,7 @@ dependencies = [ "aes", "async-trait", "bytes", + "cfg-if 1.0.3", "concat-kdf", "ctr", "ethereum-types 0.15.1", @@ -4318,6 +4319,7 @@ dependencies = [ "prometheus 0.14.0", "rand 0.8.5", "rayon", + "rocksdb", "secp256k1", "serde", "serde_json", diff --git a/cmd/ethrex/Cargo.toml b/cmd/ethrex/Cargo.toml index 1e094d30a8c..02ca0802c17 100644 --- a/cmd/ethrex/Cargo.toml +++ b/cmd/ethrex/Cargo.toml @@ -61,7 +61,7 @@ path = "./lib.rs" [features] debug = ["ethrex-vm/debug"] -default = ["libmdbx", "c-kzg", "rollup_storage_sql", "dev", "metrics"] +default = ["libmdbx", "c-kzg", "rollup_storage_sql", "dev", "metrics", "rocksdb"] dev = ["dep:ethrex-dev"] c-kzg = [ "ethrex-vm/c-kzg", @@ -71,7 +71,7 @@ c-kzg = [ ] metrics = ["ethrex-blockchain/metrics", "ethrex-l2/metrics"] libmdbx = ["ethrex-storage/libmdbx"] -rocksdb = ["ethrex-storage/rocksdb"] +rocksdb = ["ethrex-storage/rocksdb", "ethrex-p2p/rocksdb"] rollup_storage_sql = ["ethrex-storage-rollup/sql"] sync-test = ["ethrex-p2p/sync-test"] sp1 = ["ethrex-prover/sp1"] diff --git a/crates/networking/p2p/Cargo.toml b/crates/networking/p2p/Cargo.toml index f05a7b76a19..9390e7e3de6 100644 --- a/crates/networking/p2p/Cargo.toml +++ b/crates/networking/p2p/Cargo.toml @@ -30,6 +30,8 @@ spawned-concurrency.workspace = true sha2.workspace = true keccak-hash.workspace = true futures.workspace = true +cfg-if.workspace = true +rocksdb = { workspace = true, optional = true } prometheus = "0.14.0" tokio-stream = "0.1.17" @@ -53,9 +55,10 @@ hex-literal = "0.4.1" path = "./p2p.rs" [features] -default = ["c-kzg"] +default = ["c-kzg", "rocksdb"] c-kzg = ["ethrex-blockchain/c-kzg", "ethrex-common/c-kzg"] sync-test = [] +rocksdb = ["dep:rocksdb"] [lints.clippy] unwrap_used = "deny" diff --git a/crates/networking/p2p/peer_handler.rs b/crates/networking/p2p/peer_handler.rs index 42f252bbd1b..2b52932715c 100644 --- a/crates/networking/p2p/peer_handler.rs +++ b/crates/networking/p2p/peer_handler.rs @@ -37,8 +37,8 @@ use crate::{ snap::encodable_to_proof, sync::{AccountStorageRoots, BlockSyncState, block_is_stale, update_pivot}, utils::{ - SendMessageError, dump_to_file, get_account_state_snapshot_file, - get_account_storages_snapshot_file, + SendMessageError, dump_accounts_to_file, dump_storages_to_file, dump_to_file, + get_account_state_snapshot_file, get_account_storages_snapshot_file, }, }; use tracing::{debug, error, info, trace, warn}; @@ -780,8 +780,7 @@ impl PeerHandler { let account_state_chunk = current_account_hashes .into_iter() .zip(current_account_states) - .collect::>() - .encode_to_vec(); + .collect::>(); if !std::fs::exists(&account_state_snapshots_dir) .map_err(|_| PeerHandlerError::NoStateSnapshotsDir)? @@ -798,7 +797,7 @@ impl PeerHandler { chunk_file, ); // TODO: check the error type and handle it properly - let result = dump_to_file(path, account_state_chunk); + let result = dump_accounts_to_file(path, account_state_chunk); dump_account_result_sender_cloned .send(result) .await @@ -931,8 +930,7 @@ impl PeerHandler { let account_state_chunk = current_account_hashes .into_iter() .zip(current_account_states) - .collect::>() - .encode_to_vec(); + .collect::>(); if !std::fs::exists(&account_state_snapshots_dir) .map_err(|_| PeerHandlerError::NoStateSnapshotsDir)? @@ -942,7 +940,7 @@ impl PeerHandler { } let path = get_account_state_snapshot_file(account_state_snapshots_dir, chunk_file); - std::fs::write(path, account_state_chunk) + dump_accounts_to_file(path, account_state_chunk) .map_err(|_| PeerHandlerError::WriteStateSnapshotsDir(chunk_file))?; } @@ -1302,7 +1300,7 @@ impl PeerHandler { } // 2) request the chunks from peers - let mut all_account_storages = + let mut all_account_storages: Vec> = vec![vec![]; account_storage_roots.accounts_with_storage_root.len()]; // channel to send the tasks to the peers @@ -1336,8 +1334,7 @@ impl PeerHandler { .clone() .into_iter() .zip(current_account_storages) - .collect::>() - .encode_to_vec(); + .collect::>(); if !std::fs::exists(&account_storages_snapshots_dir) .map_err(|_| PeerHandlerError::NoStorageSnapshotsDir)? @@ -1363,7 +1360,7 @@ impl PeerHandler { account_storages_snapshots_dir_cloned, chunk_index, ); - dump_to_file(path, snapshot) + dump_storages_to_file(path, snapshot) }); chunk_index += 1; @@ -1567,8 +1564,7 @@ impl PeerHandler { let snapshot = current_account_hashes .into_iter() .zip(current_account_storages) - .collect::>() - .encode_to_vec(); + .collect::>(); if !std::fs::exists(&account_storages_snapshots_dir) .map_err(|_| PeerHandlerError::NoStorageSnapshotsDir)? @@ -1581,8 +1577,8 @@ impl PeerHandler { account_storages_snapshots_dir_cloned, chunk_index, ); - std::fs::write(path, snapshot) - .map_err(|_| PeerHandlerError::WriteStorageSnapshotsDir(chunk_index))?; + dump_storages_to_file(path, snapshot) + .map_err(|_| PeerHandlerError::WriteStateSnapshotsDir(chunk_index))?; } disk_joinset .join_all() diff --git a/crates/networking/p2p/utils.rs b/crates/networking/p2p/utils.rs index 9da82d3870d..47510b9e400 100644 --- a/crates/networking/p2p/utils.rs +++ b/crates/networking/p2p/utils.rs @@ -3,8 +3,8 @@ use std::{ time::{Duration, SystemTime, UNIX_EPOCH}, }; -use ethrex_common::{H256, H512}; -use ethrex_rlp::error::RLPDecodeError; +use ethrex_common::{H256, H512, U256, types::AccountState}; +use ethrex_rlp::{encode::RLPEncode, error::RLPDecodeError}; use ethrex_trie::Node; use keccak_hash::keccak; use secp256k1::{PublicKey, SecretKey}; @@ -73,6 +73,31 @@ pub fn get_account_storages_snapshot_file(directory: String, chunk_index: u64) - format!("{directory}/account_storages_chunk.rlp.{chunk_index}") } +#[cfg(feature = "rocksdb")] +pub fn dump_to_rocks_db(path: String, contents: Vec<(Vec, Vec)>) -> Result<(), DumpError> { + let writer_options = rocksdb::Options::default(); + let mut writer = rocksdb::SstFileWriter::create(&writer_options); + writer + .open(std::path::Path::new(&path)) + .map_err(|_| DumpError { + path: path.clone(), + contents: Vec::new(), + error: std::io::ErrorKind::Other, + })?; + for values in contents { + writer.put(values.0, values.1).map_err(|_| DumpError { + path: path.clone(), + contents: Vec::new(), + error: std::io::ErrorKind::Other, + })?; + } + writer.finish().map_err(|_| DumpError { + path: path.clone(), + contents: Vec::new(), + error: std::io::ErrorKind::Other, + }) +} + pub fn dump_to_file(path: String, contents: Vec) -> Result<(), DumpError> { std::fs::write(&path, &contents) .inspect_err(|err| { @@ -85,6 +110,26 @@ pub fn dump_to_file(path: String, contents: Vec) -> Result<(), DumpError> { }) } +pub fn dump_accounts_to_file( + path: String, + accounts: Vec<(H256, AccountState)>, +) -> Result<(), DumpError> { + cfg_if::cfg_if! { + if #[cfg(feature = "rocksdb")] { + dump_to_rocks_db(path, accounts.into_iter().map(|(hash, state)| (hash.0.to_vec(), state.encode_to_vec())).collect::>()) + } else { + dump_to_file(path, accounts.encode_to_vec()) + } + } +} + +pub fn dump_storages_to_file( + path: String, + storages: Vec<(H256, Vec<(H256, U256)>)>, +) -> Result<(), DumpError> { + dump_to_file(path, storages.encode_to_vec()) +} + /// TODO: make it more generic pub async fn send_message_and_wait_for_response( peer_channel: &mut PeerChannels, From 87cfc017036edcfe9ff341a5ce589cda0cf6d7a6 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Wed, 17 Sep 2025 17:23:12 -0300 Subject: [PATCH 002/115] format --- crates/networking/p2p/peer_handler.rs | 8 +++++++- crates/networking/p2p/utils.rs | 10 +++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/crates/networking/p2p/peer_handler.rs b/crates/networking/p2p/peer_handler.rs index 2b52932715c..6bcc8af627d 100644 --- a/crates/networking/p2p/peer_handler.rs +++ b/crates/networking/p2p/peer_handler.rs @@ -941,6 +941,12 @@ impl PeerHandler { let path = get_account_state_snapshot_file(account_state_snapshots_dir, chunk_file); dump_accounts_to_file(path, account_state_chunk) + .inspect_err(|err| { + error!( + "We had an error dumping the last accounts to disk {}", + err.error + ) + }) .map_err(|_| PeerHandlerError::WriteStateSnapshotsDir(chunk_file))?; } @@ -1578,7 +1584,7 @@ impl PeerHandler { chunk_index, ); dump_storages_to_file(path, snapshot) - .map_err(|_| PeerHandlerError::WriteStateSnapshotsDir(chunk_index))?; + .map_err(|_| PeerHandlerError::WriteStorageSnapshotsDir(chunk_index))?; } disk_joinset .join_all() diff --git a/crates/networking/p2p/utils.rs b/crates/networking/p2p/utils.rs index 47510b9e400..08c761b1ff8 100644 --- a/crates/networking/p2p/utils.rs +++ b/crates/networking/p2p/utils.rs @@ -115,11 +115,11 @@ pub fn dump_accounts_to_file( accounts: Vec<(H256, AccountState)>, ) -> Result<(), DumpError> { cfg_if::cfg_if! { - if #[cfg(feature = "rocksdb")] { - dump_to_rocks_db(path, accounts.into_iter().map(|(hash, state)| (hash.0.to_vec(), state.encode_to_vec())).collect::>()) - } else { - dump_to_file(path, accounts.encode_to_vec()) - } + if #[cfg(feature = "rocksdb")] { + dump_to_rocks_db(path, accounts.into_iter().map(|(hash, state)| (hash.0.to_vec(), state.encode_to_vec())).collect::>()) + } else { + dump_to_file(path, accounts.encode_to_vec()) + } } } From b7f29e717112366c075213c164dc8ef4e20aae4c Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Wed, 17 Sep 2025 17:32:43 -0300 Subject: [PATCH 003/115] Update utils.rs --- crates/networking/p2p/utils.rs | 43 +++++++++++++++++----------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/crates/networking/p2p/utils.rs b/crates/networking/p2p/utils.rs index 08c761b1ff8..eb6fb90ff73 100644 --- a/crates/networking/p2p/utils.rs +++ b/crates/networking/p2p/utils.rs @@ -10,12 +10,12 @@ use keccak_hash::keccak; use secp256k1::{PublicKey, SecretKey}; use spawned_concurrency::error::GenServerError; +use crate::peer_handler::DumpError; use crate::{ kademlia::PeerChannels, rlpx::{Message, connection::server::CastMessage, snap::TrieNodes}, }; - -use crate::peer_handler::DumpError; +use tracing::error; /// Computes the node_id from a public key (aka computes the Keccak256 hash of the given public key) pub fn node_id(public_key: &H512) -> H256 { @@ -74,28 +74,17 @@ pub fn get_account_storages_snapshot_file(directory: String, chunk_index: u64) - } #[cfg(feature = "rocksdb")] -pub fn dump_to_rocks_db(path: String, contents: Vec<(Vec, Vec)>) -> Result<(), DumpError> { +pub fn dump_to_rocks_db( + path: String, + contents: Vec<(Vec, Vec)>, +) -> Result<(), rocksdb::Error> { let writer_options = rocksdb::Options::default(); let mut writer = rocksdb::SstFileWriter::create(&writer_options); - writer - .open(std::path::Path::new(&path)) - .map_err(|_| DumpError { - path: path.clone(), - contents: Vec::new(), - error: std::io::ErrorKind::Other, - })?; + writer.open(std::path::Path::new(&path))?; for values in contents { - writer.put(values.0, values.1).map_err(|_| DumpError { - path: path.clone(), - contents: Vec::new(), - error: std::io::ErrorKind::Other, - })?; + writer.put(values.0, values.1)?; } - writer.finish().map_err(|_| DumpError { - path: path.clone(), - contents: Vec::new(), - error: std::io::ErrorKind::Other, - }) + writer.finish() } pub fn dump_to_file(path: String, contents: Vec) -> Result<(), DumpError> { @@ -116,7 +105,19 @@ pub fn dump_accounts_to_file( ) -> Result<(), DumpError> { cfg_if::cfg_if! { if #[cfg(feature = "rocksdb")] { - dump_to_rocks_db(path, accounts.into_iter().map(|(hash, state)| (hash.0.to_vec(), state.encode_to_vec())).collect::>()) + dump_to_rocks_db( + path.clone(), + accounts + .into_iter() + .map(|(hash, state)| (hash.0.to_vec(), state.encode_to_vec()) + ).collect::>() + ) + .inspect_err(|err| error!("Rocksdb writing stt error {err:?}")) + .map_err(|_| DumpError { + path, + contents: Vec::new(), + error: std::io::ErrorKind::Other, + }) } else { dump_to_file(path, accounts.encode_to_vec()) } From 72f00b72b2158bebca7cb3dadf59e3cdfd4a38a1 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Wed, 17 Sep 2025 17:37:22 -0300 Subject: [PATCH 004/115] compare --- crates/networking/p2p/utils.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/networking/p2p/utils.rs b/crates/networking/p2p/utils.rs index eb6fb90ff73..b4c502e1e12 100644 --- a/crates/networking/p2p/utils.rs +++ b/crates/networking/p2p/utils.rs @@ -76,8 +76,9 @@ pub fn get_account_storages_snapshot_file(directory: String, chunk_index: u64) - #[cfg(feature = "rocksdb")] pub fn dump_to_rocks_db( path: String, - contents: Vec<(Vec, Vec)>, + mut contents: Vec<(Vec, Vec)>, ) -> Result<(), rocksdb::Error> { + contents.sort(); let writer_options = rocksdb::Options::default(); let mut writer = rocksdb::SstFileWriter::create(&writer_options); writer.open(std::path::Path::new(&path))?; From 999115df690ed2911a8637ed34508c8f6ee4c438 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Wed, 17 Sep 2025 17:48:59 -0300 Subject: [PATCH 005/115] moved function outside --- crates/networking/p2p/sync.rs | 101 +++++++++++++++++----------------- 1 file changed, 50 insertions(+), 51 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index c53e71ec855..aa9f64b2d0d 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -854,58 +854,9 @@ impl Syncer { *METRICS.account_tries_insert_start_time.lock().await = Some(SystemTime::now()); // We read the account leafs from the files in account_state_snapshots_dir, write it into // the trie to compute the nodes and stores the accounts with storages for later use - let mut computed_state_root = *EMPTY_TRIE_HASH; - for entry in std::fs::read_dir(&account_state_snapshots_dir) - .map_err(|_| SyncError::AccountStateSnapshotsDirNotFound)? - { - *METRICS.current_step.lock().await = "Inserting Account Ranges".to_string(); - let entry = entry.map_err(|err| { - SyncError::SnapshotReadError(account_state_snapshots_dir.clone().into(), err) - })?; - info!("Reading account file from entry {entry:?}"); - let snapshot_path = entry.path(); - let snapshot_contents = std::fs::read(&snapshot_path) - .map_err(|err| SyncError::SnapshotReadError(snapshot_path.clone(), err))?; - let account_states_snapshot: Vec<(H256, AccountState)> = - RLPDecode::decode(&snapshot_contents) - .map_err(|_| SyncError::SnapshotDecodeError(snapshot_path.clone()))?; - - let (account_hashes, account_states): (Vec, Vec) = - account_states_snapshot.iter().cloned().unzip(); - - storage_accounts.accounts_with_storage_root.extend( - account_hashes - .iter() - .zip(account_states.iter()) - .filter_map(|(hash, state)| { - (state.storage_root != *EMPTY_TRIE_HASH) - .then_some((*hash, state.storage_root)) - }), - ); - - info!("Inserting accounts into the state trie"); - - let store_clone = store.clone(); - let current_state_root = - tokio::task::spawn_blocking(move || -> Result { let mut trie = store_clone.open_state_trie(computed_state_root)?; - - for (account_hash, account) in account_states_snapshot { - METRICS - .account_tries_inserted - .fetch_add(1, Ordering::Relaxed); - trie.insert(account_hash.0.to_vec(), account.encode_to_vec())?; - } - *METRICS.current_step.blocking_lock() = - "Inserting Account Ranges - \x1b[31mWriting to DB\x1b[0m".to_string(); - let current_state_root = trie.hash()?; - Ok(current_state_root) - }) - .await??; - - computed_state_root = current_state_root; - } - + let computed_state_root = + insert_accounts_into_db(store.clone(), &mut storage_accounts).await?; info!( "Finished inserting account ranges, total storage accounts: {}", storage_accounts.accounts_with_storage_root.len() @@ -1388,3 +1339,51 @@ where Some(bytecode_iter.by_ref().take(BYTECODE_CHUNK_SIZE).collect()) .filter(|chunk: &Vec<_>| !chunk.is_empty()) } + +async fn insert_accounts_into_db( + store: Store, + storage_accounts: &mut AccountStorageRoots, + account_state_snapshots_dir: &str, +) -> Result { + let mut computed_state_root = *EMPTY_TRIE_HASH; + for entry in std::fs::read_dir(account_state_snapshots_dir) + .map_err(|_| SyncError::AccountStateSnapshotsDirNotFound)? + { + let entry = entry.map_err(|err| { + SyncError::SnapshotReadError(account_state_snapshots_dir.clone().into(), err) + })?; + info!("Reading account file from entry {entry:?}"); + let snapshot_path = entry.path(); + let snapshot_contents = std::fs::read(&snapshot_path) + .map_err(|err| SyncError::SnapshotReadError(snapshot_path.clone(), err))?; + let account_states_snapshot: Vec<(H256, AccountState)> = + RLPDecode::decode(&snapshot_contents) + .map_err(|_| SyncError::SnapshotDecodeError(snapshot_path.clone()))?; + + storage_accounts.accounts_with_storage_root.extend( + account_states_snapshot.iter().filter_map(|(hash, state)| { + (state.storage_root != *EMPTY_TRIE_HASH).then_some((*hash, state.storage_root)) + }), + ); + + info!("Inserting accounts into the state trie"); + + let store_clone = store.clone(); + let current_state_root: Result = + tokio::task::spawn_blocking(move || -> Result { + let mut trie = store_clone.open_state_trie(computed_state_root)?; + + for (account_hash, account) in account_states_snapshot { + trie.insert(account_hash.0.to_vec(), account.encode_to_vec())?; + } + info!("Comitting to disk"); + let current_state_root = trie.hash()?; + Ok(current_state_root) + }) + .await?; + + computed_state_root = current_state_root?; + } + info!("computed_state_root {computed_state_root}"); + Ok(computed_state_root) +} From 238477286494d383db550d60d087a2ee6cada786 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Wed, 17 Sep 2025 17:49:59 -0300 Subject: [PATCH 006/115] Update sync.rs --- crates/networking/p2p/sync.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index aa9f64b2d0d..a5bcb08796b 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -854,9 +854,12 @@ impl Syncer { *METRICS.account_tries_insert_start_time.lock().await = Some(SystemTime::now()); // We read the account leafs from the files in account_state_snapshots_dir, write it into // the trie to compute the nodes and stores the accounts with storages for later use - let mut trie = store_clone.open_state_trie(computed_state_root)?; - let computed_state_root = - insert_accounts_into_db(store.clone(), &mut storage_accounts).await?; + let computed_state_root = insert_accounts_into_db( + store.clone(), + &mut storage_accounts, + &account_state_snapshots_dir, + ) + .await?; info!( "Finished inserting account ranges, total storage accounts: {}", storage_accounts.accounts_with_storage_root.len() From ae963c8380905b2862649306c791b0d6f1d8b9d1 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Wed, 17 Sep 2025 17:52:34 -0300 Subject: [PATCH 007/115] Update Cargo.toml --- crates/networking/p2p/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/networking/p2p/Cargo.toml b/crates/networking/p2p/Cargo.toml index 9390e7e3de6..0b7999f32b6 100644 --- a/crates/networking/p2p/Cargo.toml +++ b/crates/networking/p2p/Cargo.toml @@ -55,7 +55,7 @@ hex-literal = "0.4.1" path = "./p2p.rs" [features] -default = ["c-kzg", "rocksdb"] +default = ["c-kzg"] c-kzg = ["ethrex-blockchain/c-kzg", "ethrex-common/c-kzg"] sync-test = [] rocksdb = ["dep:rocksdb"] From b60cd753da8e7bb04c0f0f382d2242f3217844d8 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Wed, 17 Sep 2025 17:55:58 -0300 Subject: [PATCH 008/115] Update Cargo.toml --- cmd/ethrex/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/ethrex/Cargo.toml b/cmd/ethrex/Cargo.toml index 02ca0802c17..49d621a41ce 100644 --- a/cmd/ethrex/Cargo.toml +++ b/cmd/ethrex/Cargo.toml @@ -61,7 +61,7 @@ path = "./lib.rs" [features] debug = ["ethrex-vm/debug"] -default = ["libmdbx", "c-kzg", "rollup_storage_sql", "dev", "metrics", "rocksdb"] +default = ["libmdbx", "c-kzg", "rollup_storage_sql", "dev", "metrics"] dev = ["dep:ethrex-dev"] c-kzg = [ "ethrex-vm/c-kzg", From 29c4b4cbd3a20b88a57daf0d323ccf65f290e56f Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Wed, 17 Sep 2025 19:19:19 -0300 Subject: [PATCH 009/115] Update sync.rs --- crates/networking/p2p/sync.rs | 116 +++++++++++++++++++--------------- 1 file changed, 66 insertions(+), 50 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index a5bcb08796b..d7b72a21f54 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -944,54 +944,14 @@ impl Syncer { *METRICS.current_step.lock().await = "Inserting Storage Ranges - \x1b[31mWriting to DB\x1b[0m".to_string(); let account_storages_snapshots_dir = get_account_storages_snapshots_dir(&self.datadir); - for entry in std::fs::read_dir(&account_storages_snapshots_dir) - .map_err(|_| SyncError::AccountStoragesSnapshotsDirNotFound)? - { - let entry = entry.map_err(|err| { - SyncError::SnapshotReadError(account_storages_snapshots_dir.clone().into(), err) - })?; - info!("Reading account storage file from entry {entry:?}"); - - let snapshot_path = entry.path(); - - let snapshot_contents = std::fs::read(&snapshot_path) - .map_err(|err| SyncError::SnapshotReadError(snapshot_path.clone(), err))?; - - let account_storages_snapshot: Vec<(H256, Vec<(H256, U256)>)> = - RLPDecode::decode(&snapshot_contents) - .map_err(|_| SyncError::SnapshotDecodeError(snapshot_path.clone()))?; - - let maybe_big_account_storage_state_roots_clone = - maybe_big_account_storage_state_roots.clone(); - let store_clone = store.clone(); - let pivot_hash_moved = pivot_header.hash(); - info!("Starting compute of account_storages_snapshot"); - let storage_trie_node_changes = tokio::task::spawn_blocking(move || { - let store: Store = store_clone; - - // TODO: Here we are filtering again the account with empty storage because we are adding empty accounts on purpose (it was the easiest thing to do) - // We need to fix this issue in request_storage_ranges and remove this filter. - account_storages_snapshot - .into_par_iter() - .filter(|(_account_hash, storage)| !storage.is_empty()) - .map(|(account_hash, key_value_pairs)| { - compute_storage_roots( - maybe_big_account_storage_state_roots_clone.clone(), - store.clone(), - account_hash, - key_value_pairs, - pivot_hash_moved, - ) - }) - .collect::, SyncError>>() - }) - .await??; - info!("Writing to db"); + insert_storages_into_db( + store.clone(), + &account_storages_snapshots_dir, + &maybe_big_account_storage_state_roots, + &pivot_header, + ) + .await?; - store - .write_storage_trie_nodes_batch(storage_trie_node_changes) - .await?; - } *METRICS.storage_tries_insert_end_time.lock().await = Some(SystemTime::now()); info!("Finished storing storage tries"); @@ -1352,9 +1312,8 @@ async fn insert_accounts_into_db( for entry in std::fs::read_dir(account_state_snapshots_dir) .map_err(|_| SyncError::AccountStateSnapshotsDirNotFound)? { - let entry = entry.map_err(|err| { - SyncError::SnapshotReadError(account_state_snapshots_dir.clone().into(), err) - })?; + let entry = entry + .map_err(|err| SyncError::SnapshotReadError(account_state_snapshots_dir.into(), err))?; info!("Reading account file from entry {entry:?}"); let snapshot_path = entry.path(); let snapshot_contents = std::fs::read(&snapshot_path) @@ -1390,3 +1349,60 @@ async fn insert_accounts_into_db( info!("computed_state_root {computed_state_root}"); Ok(computed_state_root) } + +async fn insert_storages_into_db( + store: Store, + account_storages_snapshots_dir: &str, + maybe_big_account_storage_state_roots: &Arc>>, + pivot_header: &BlockHeader, +) -> Result<(), SyncError> { + for entry in std::fs::read_dir(account_storages_snapshots_dir) + .map_err(|_| SyncError::AccountStoragesSnapshotsDirNotFound)? + { + let entry = entry.map_err(|err| { + SyncError::SnapshotReadError(account_storages_snapshots_dir.into(), err) + })?; + info!("Reading account storage file from entry {entry:?}"); + + let snapshot_path = entry.path(); + + let snapshot_contents = std::fs::read(&snapshot_path) + .map_err(|err| SyncError::SnapshotReadError(snapshot_path.clone(), err))?; + + let account_storages_snapshot: Vec<(H256, Vec<(H256, U256)>)> = + RLPDecode::decode(&snapshot_contents) + .map_err(|_| SyncError::SnapshotDecodeError(snapshot_path.clone()))?; + + let maybe_big_account_storage_state_roots_clone = + maybe_big_account_storage_state_roots.clone(); + let store_clone = store.clone(); + let pivot_hash_moved = pivot_header.hash(); + info!("Starting compute of account_storages_snapshot"); + let storage_trie_node_changes = tokio::task::spawn_blocking(move || { + let store: Store = store_clone; + + // TODO: Here we are filtering again the account with empty storage because we are adding empty accounts on purpose (it was the easiest thing to do) + // We need to fix this issue in request_storage_ranges and remove this filter. + account_storages_snapshot + .into_par_iter() + .filter(|(_account_hash, storage)| !storage.is_empty()) + .map(|(account_hash, key_value_pairs)| { + compute_storage_roots( + maybe_big_account_storage_state_roots_clone.clone(), + store.clone(), + account_hash, + key_value_pairs, + pivot_hash_moved, + ) + }) + .collect::, SyncError>>() + }) + .await??; + info!("Writing to db"); + + store + .write_storage_trie_nodes_batch(storage_trie_node_changes) + .await?; + } + Ok(()) +} From 287da4ea9199cff32a5f29492bc7309ae83d199f Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Thu, 18 Sep 2025 14:50:31 -0300 Subject: [PATCH 010/115] Sorted trie --- Cargo.lock | 1 + cmd/ethrex/Cargo.toml | 2 +- crates/common/trie/Cargo.toml | 1 + crates/common/trie/nibbles.rs | 2 +- crates/common/trie/node/branch.rs | 2 +- crates/common/trie/trie.rs | 1 + crates/common/trie/trie_sorted.rs | 406 ++++++++++++++++++++++++++++++ crates/networking/p2p/utils.rs | 24 +- 8 files changed, 435 insertions(+), 4 deletions(-) create mode 100644 crates/common/trie/trie_sorted.rs diff --git a/Cargo.lock b/Cargo.lock index acc0f3ba3fc..9e8a2dde23e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4583,6 +4583,7 @@ dependencies = [ "libmdbx", "proptest", "rand 0.8.5", + "rayon", "rocksdb", "serde", "serde_json", diff --git a/cmd/ethrex/Cargo.toml b/cmd/ethrex/Cargo.toml index 49d621a41ce..02ca0802c17 100644 --- a/cmd/ethrex/Cargo.toml +++ b/cmd/ethrex/Cargo.toml @@ -61,7 +61,7 @@ path = "./lib.rs" [features] debug = ["ethrex-vm/debug"] -default = ["libmdbx", "c-kzg", "rollup_storage_sql", "dev", "metrics"] +default = ["libmdbx", "c-kzg", "rollup_storage_sql", "dev", "metrics", "rocksdb"] dev = ["dep:ethrex-dev"] c-kzg = [ "ethrex-vm/c-kzg", diff --git a/crates/common/trie/Cargo.toml b/crates/common/trie/Cargo.toml index 527e5ebc4d9..c4604ab4b26 100644 --- a/crates/common/trie/Cargo.toml +++ b/crates/common/trie/Cargo.toml @@ -20,6 +20,7 @@ rocksdb = { workspace = true, optional = true } smallvec = { version = "1.10.0", features = ["const_generics", "union"] } digest = "0.10.6" lazy_static.workspace = true +rayon.workspace = true [features] default = [] diff --git a/crates/common/trie/nibbles.rs b/crates/common/trie/nibbles.rs index cc3aaf61504..4ca0bff05b5 100644 --- a/crates/common/trie/nibbles.rs +++ b/crates/common/trie/nibbles.rs @@ -10,7 +10,7 @@ use ethrex_rlp::{ /// Struct representing a list of nibbles (half-bytes) #[derive(Debug, Clone, Default, PartialEq, Eq, PartialOrd, Ord)] pub struct Nibbles { - data: Vec, + pub(crate) data: Vec, } impl std::hash::Hash for Nibbles { diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index 685930b8170..521ebdd60fc 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -6,7 +6,7 @@ use super::{ExtensionNode, LeafNode, Node, NodeRef, ValueOrHash}; /// Branch Node of an an Ethereum Compatible Patricia Merkle Trie /// Contains the node's value and the hash of its children nodes -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Default, Clone, PartialEq)] pub struct BranchNode { pub choices: [NodeRef; 16], pub value: ValueRLP, diff --git a/crates/common/trie/trie.rs b/crates/common/trie/trie.rs index 6583770f67c..12e75b43d5e 100644 --- a/crates/common/trie/trie.rs +++ b/crates/common/trie/trie.rs @@ -8,6 +8,7 @@ mod rlp; #[cfg(test)] mod test_utils; mod trie_iter; +pub mod trie_sorted; mod verify_range; use ethereum_types::H256; use ethrex_rlp::constants::RLP_NULL; diff --git a/crates/common/trie/trie_sorted.rs b/crates/common/trie/trie_sorted.rs new file mode 100644 index 00000000000..3f5d47d42d0 --- /dev/null +++ b/crates/common/trie/trie_sorted.rs @@ -0,0 +1,406 @@ +use std::thread::{Scope, scope}; + +use crate::{ + Nibbles, Node, TrieDB, TrieError, + node::{BranchNode, ExtensionNode, LeafNode}, +}; +use ethereum_types::H256; +use ethrex_rlp::encode::RLPEncode; +use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; +use tracing::debug; + +#[derive(Debug, Default, Clone)] +struct StackElement { + path: Nibbles, + element: BranchNode, +} + +#[derive(Debug, Clone)] +enum CenterSideElement { + Branch { node: BranchNode }, + Leaf { value: Vec }, +} + +#[derive(Debug, Clone)] +struct CenterSide { + path: Nibbles, + element: CenterSideElement, +} + +#[derive(Debug, thiserror::Error)] +pub enum TrieGenerationError { + #[error("When creating a child node, the nibbles diff was empty. Child Node {0:x?}")] + IndexNotFound(Nibbles), + #[error("When popping from the trie stack it was empty. Current position: {0:x?}")] + TrieStackEmpty(Nibbles), + #[error(transparent)] + FlushToDbError(TrieError), + #[error("When joining the write threads, error")] + ThreadJoinError(), +} + +const SIZE_TO_WRITE_DB: u64 = 20_000; + +impl CenterSide { + fn from_value(tuple: (H256, Vec)) -> CenterSide { + CenterSide { + path: Nibbles::from_raw(&tuple.0.0, true), + element: CenterSideElement::Leaf { value: tuple.1 }, + } + } + fn from_stack_element(element: StackElement) -> CenterSide { + CenterSide { + path: element.path, + element: CenterSideElement::Branch { + node: element.element, + }, + } + } +} + +fn is_child(this: &Nibbles, other: &StackElement) -> bool { + this.count_prefix(&other.path) == other.path.len() +} + +fn create_parent(center_side: &CenterSide, closest_nibbles: &Nibbles) -> StackElement { + let new_parent_nibbles = center_side + .path + .slice(0, center_side.path.count_prefix(closest_nibbles)); + StackElement { + path: new_parent_nibbles, + element: BranchNode { + choices: BranchNode::EMPTY_CHOICES, + value: vec![], + }, + } +} + +fn add_center_to_parent_and_write_queue( + nodes_to_write: &mut Vec, + center_side: &CenterSide, + parent_element: &mut StackElement, +) -> Result<(), TrieGenerationError> { + debug!("{:x?}", center_side.path); + debug!("{:x?}", parent_element.path); + let mut path = center_side.path.clone(); + path.skip_prefix(&parent_element.path); + let index = path + .next() + .ok_or(TrieGenerationError::IndexNotFound(center_side.path.clone()))?; + let node: Node = match ¢er_side.element { + CenterSideElement::Branch { node } => { + if path.is_empty() { + node.clone().into() + } else { + let hash = node.compute_hash(); + nodes_to_write.push(node.clone().into()); + ExtensionNode { + prefix: path, + child: hash.into(), + } + .into() + } + } + CenterSideElement::Leaf { value } => LeafNode { + partial: path, + value: value.clone(), + } + .into(), + }; + parent_element.element.choices[index as usize] = node.compute_hash().into(); + debug!( + "branch {:x?}", + parent_element + .element + .choices + .iter() + .enumerate() + .filter_map(|(index, child)| child.is_valid().then_some(index)) + .collect::>() + ); + nodes_to_write.push(node); + Ok(()) +} + +fn flush_nodes_to_write( + nodes_to_write: Vec, + db: &dyn TrieDB, +) -> Result<(), TrieGenerationError> { + db.put_batch( + nodes_to_write + .par_iter() + .map(|node| (node.compute_hash(), node.encode_to_vec())) + .collect(), + ) + .map_err(TrieGenerationError::FlushToDbError) +} + +#[inline(never)] +pub fn trie_from_sorted_accounts<'scope, T>( + db: &'scope dyn TrieDB, + accounts_iter: &mut T, + scoped_thread: &'scope Scope<'scope, '_>, +) -> Result +where + T: Iterator)>, +{ + let mut nodes_to_write: Vec = Vec::with_capacity(SIZE_TO_WRITE_DB as usize + 65); + let mut trie_stack: Vec = Vec::with_capacity(64); // Optimized for H256 + let mut write_threads = Vec::new(); + + let mut left_side = StackElement::default(); + let mut center_side: CenterSide = CenterSide::from_value(accounts_iter.next().unwrap()); + let mut right_side_opt: Option<(H256, Vec)> = accounts_iter.next(); + + while let Some(right_side) = right_side_opt { + if nodes_to_write.len() as u64 > SIZE_TO_WRITE_DB { + write_threads + .push(scoped_thread.spawn(move || flush_nodes_to_write(nodes_to_write, db))); + nodes_to_write = Vec::with_capacity(SIZE_TO_WRITE_DB as usize + 65); + } + + let right_side_path = Nibbles::from_bytes(right_side.0.as_bytes()); + while !is_child(&right_side_path, &left_side) { + add_center_to_parent_and_write_queue( + &mut nodes_to_write, + ¢er_side, + &mut left_side, + )?; + let temp = CenterSide::from_stack_element(left_side); + left_side = trie_stack.pop().ok_or(TrieGenerationError::TrieStackEmpty( + center_side.path.clone(), + ))?; + center_side = temp; + } + + if center_side.path.count_prefix(&left_side.path) + >= center_side.path.count_prefix(&right_side_path) + { + add_center_to_parent_and_write_queue( + &mut nodes_to_write, + ¢er_side, + &mut left_side, + )?; + } else { + let mut element = create_parent(¢er_side, &right_side_path); + add_center_to_parent_and_write_queue(&mut nodes_to_write, ¢er_side, &mut element)?; + trie_stack.push(left_side); + left_side = element; + } + center_side = CenterSide::from_value(right_side); + right_side_opt = accounts_iter.next(); + } + + while !is_child(¢er_side.path, &left_side) { + let temp = CenterSide::from_stack_element(left_side); + left_side = trie_stack.pop().ok_or(TrieGenerationError::TrieStackEmpty( + center_side.path.clone(), + ))?; + add_center_to_parent_and_write_queue(&mut nodes_to_write, &temp, &mut left_side)?; + } + + add_center_to_parent_and_write_queue(&mut nodes_to_write, ¢er_side, &mut left_side)?; + + while let Some(mut parent_node) = trie_stack.pop() { + add_center_to_parent_and_write_queue( + &mut nodes_to_write, + &CenterSide::from_stack_element(left_side), + &mut parent_node, + )?; + left_side = parent_node; + } + + let hash = if left_side + .element + .choices + .iter() + .filter(|choice| choice.is_valid()) + .count() + == 1 + { + let (index, child) = left_side + .element + .choices + .into_iter() + .enumerate() + .find(|(_, child)| child.is_valid()) + .unwrap(); + + debug_assert!(nodes_to_write.last().unwrap().compute_hash() == child.compute_hash()); + match nodes_to_write.iter_mut().last().unwrap() { + Node::Branch(_) => { + nodes_to_write.push( + ExtensionNode { + prefix: Nibbles::from_hex(vec![index as u8]), + child, + } + .into(), + ); + nodes_to_write + .last() + .expect("we just inserted") + .compute_hash() + .finalize() + } + Node::Extension(extension_node) => { + extension_node.prefix.data.insert(0, index as u8); + extension_node.compute_hash().finalize() + } + Node::Leaf(leaf_node) => leaf_node.compute_hash().finalize(), + } + } else { + nodes_to_write.push(left_side.element.into()); + nodes_to_write + .last() + .expect("we just inserted") + .compute_hash() + .finalize() + }; + + write_threads.push(scoped_thread.spawn(move || flush_nodes_to_write(nodes_to_write, db))); + write_threads + .into_iter() + .flat_map(|thread| thread.join()) + .collect::>()?; + + Ok(hash) +} + +fn trie_from_sorted_accounts_wrap( + db: &dyn TrieDB, + accounts_iter: &mut T, +) -> Result +where + T: Iterator)>, +{ + scope(move |s| trie_from_sorted_accounts(db, accounts_iter, s)) +} + +#[cfg(test)] +mod test { + use ethereum_types::U256; + + use crate::Trie; + + use super::*; + use std::{collections::BTreeMap, str::FromStr}; + + fn generate_input_1() -> BTreeMap> { + let mut accounts: BTreeMap> = BTreeMap::new(); + for string in [ + "68521f7430502aef983fd7568ea179ed0f8d12d5b68883c90573781ae0778ec2", + "68db10f720d5972738df0d841d64c7117439a1a2ca9ba247e7239b19eb187414", + "6b7c1458952b903dbe3717bc7579f18e5cb1136be1b11b113cdac0f0791c07d3", + ] { + accounts.insert(H256::from_str(string).unwrap(), vec![0, 1]); + } + accounts + } + + fn generate_input_2() -> BTreeMap> { + let mut accounts: BTreeMap> = BTreeMap::new(); + for string in [ + "0532f23d3bd5277790ece5a6cb6fc684bc473a91ffe3a0334049527c4f6987e9", + "14d5df819167b77851220ee266178aee165daada67ca865e9d50faed6b4fdbe3", + "6908aa86b715fcf221f208a28bb84bf6359ba9c41da04b7e17a925cdb22bf704", + "90bbe47533cd80b5d9cef6c283415edd90296bf4ac4ede6d2a6b42bb3d5e7d0e", + "90c2fdad333366cf0f18f0dded9b478590c0563e4c847c79aee0b733b5a9104f", + "af9e3efce873619102dfdb0504abd44179191bccfb624608961e71492a1ba5b7", + "b723d5841dc4d6d3fe7de03ad74dd83798c3b68f752bba29c906ec7f5a469452", + "c2c6fd64de59489f0c27e75443c24327cef6415f1d3ee1659646abefab212113", + "ca0d791e7a3e0f25d775034acecbaaf9219939288e6282d8291e181b9c3c24b0", + "f0dcaaa40dfc67925d6e172e48b8f83954ba46cfb1bb522c809f3b93b49205ee", + ] { + accounts.insert(H256::from_str(string).unwrap(), vec![0, 1]); + } + accounts + } + + fn generate_input_3() -> BTreeMap> { + let mut accounts: BTreeMap> = BTreeMap::new(); + for string in [ + "0532f23d3bd5277790ece5a6cb6fc684bc473a91ffe3a0334049527c4f6987e9", + "0542f23d3bd5277790ece5a6cb6fc684bc473a91ffe3a0334049527c4f6987e9", + "0552f23d3bd5277790ece5a6cb6fc684bc473a91ffe3a0334049527c4f6987e9", + ] { + accounts.insert(H256::from_str(string).unwrap(), vec![0, 1]); + } + accounts + } + + fn generate_input_slots_1() -> BTreeMap { + let mut slots: BTreeMap = BTreeMap::new(); + for string in [ + "0532f23d3bd5277790ece5a6cb6fc684bc473a91ffe3a0334049527c4f6987e8", + "0532f23d3bd5277790ece5a6cb6fc684bc473a91ffe3a0334049527c4f6987e9", + "0552f23d3bd5277790ece5a6cb6fc684bc473a91ffe3a0334049527c4f6987e9", + ] { + slots.insert(H256::from_str(string).unwrap(), U256::zero()); + } + slots + } + + pub fn run_test_account_state(accounts: BTreeMap>) { + let trie = Trie::stateless(); + let db = trie.db(); + let tested_trie_hash: H256 = trie_from_sorted_accounts_wrap( + db, + &mut accounts + .clone() + .into_iter() + .map(|(hash, state)| (hash, state.encode_to_vec())), + ) + .expect("Shouldn't have errors"); + + let mut trie: Trie = Trie::empty_in_memory(); + for account in accounts.iter() { + trie.insert(account.0.as_bytes().to_vec(), account.1.encode_to_vec()) + .unwrap(); + } + + assert!(tested_trie_hash == trie.hash_no_commit()) + } + + pub fn run_test_storage_slots(slots: BTreeMap) { + let trie = Trie::stateless(); + let db = trie.db(); + let tested_trie_hash: H256 = trie_from_sorted_accounts_wrap( + db, + &mut slots + .clone() + .into_iter() + .map(|(hash, state)| (hash, state.encode_to_vec())), + ) + .expect("Shouldn't have errors"); + + let mut trie: Trie = Trie::empty_in_memory(); + for account in slots.iter() { + trie.insert(account.0.as_bytes().to_vec(), account.1.encode_to_vec()) + .unwrap(); + } + + let trie_hash = trie.hash_no_commit(); + + assert!(tested_trie_hash == trie_hash) + } + + #[test] + fn test_1() { + run_test_account_state(generate_input_1()); + } + + #[test] + fn test_2() { + run_test_account_state(generate_input_2()); + } + + #[test] + fn test_3() { + run_test_account_state(generate_input_3()); + } + + #[test] + fn test_slots_1() { + run_test_storage_slots(generate_input_slots_1()); + } +} diff --git a/crates/networking/p2p/utils.rs b/crates/networking/p2p/utils.rs index b4c502e1e12..690b4910f1d 100644 --- a/crates/networking/p2p/utils.rs +++ b/crates/networking/p2p/utils.rs @@ -129,7 +129,29 @@ pub fn dump_storages_to_file( path: String, storages: Vec<(H256, Vec<(H256, U256)>)>, ) -> Result<(), DumpError> { - dump_to_file(path, storages.encode_to_vec()) + cfg_if::cfg_if! { + if #[cfg(feature = "rocksdb")] { + dump_to_rocks_db( + path.clone(), + storages + .into_iter() + .flat_map(|(hash, slots)| { + slots.into_iter().map(move |(slot_hash, slot_value)| { + let key = [hash.as_bytes(), slot_hash.as_bytes()].concat(); + (key, slot_value.encode_to_vec()) + }) + }).collect::>() + ) + .inspect_err(|err| error!("Rocksdb writing stt error {err:?}")) + .map_err(|_| DumpError { + path, + contents: Vec::new(), + error: std::io::ErrorKind::Other, + }) + } else { + dump_to_file(path, storages.encode_to_vec()) + } + } } /// TODO: make it more generic From dded7b72c5631ed83915c8aa18a934fc53eb691a Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Thu, 18 Sep 2025 17:58:27 -0300 Subject: [PATCH 011/115] Compiles --- crates/common/trie/trie_sorted.rs | 2 +- crates/networking/p2p/sync.rs | 144 ++++++++++++++++++++++++++---- crates/networking/p2p/utils.rs | 8 ++ 3 files changed, 136 insertions(+), 18 deletions(-) diff --git a/crates/common/trie/trie_sorted.rs b/crates/common/trie/trie_sorted.rs index 3f5d47d42d0..091dec12472 100644 --- a/crates/common/trie/trie_sorted.rs +++ b/crates/common/trie/trie_sorted.rs @@ -266,7 +266,7 @@ where Ok(hash) } -fn trie_from_sorted_accounts_wrap( +pub fn trie_from_sorted_accounts_wrap( db: &dyn TrieDB, accounts_iter: &mut T, ) -> Result diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index d7b72a21f54..5b6c40204e7 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -7,6 +7,7 @@ use crate::sync::state_healing::heal_state_trie_wrap; use crate::sync::storage_healing::heal_storage_trie; use crate::utils::{ current_unix_time, get_account_state_snapshots_dir, get_account_storages_snapshots_dir, + get_rocksdb_temp_accounts_dir, get_rocksdb_temp_storage_dir, }; use crate::{ metrics::METRICS, @@ -20,9 +21,10 @@ use ethrex_common::{ }; use ethrex_rlp::{decode::RLPDecode, encode::RLPEncode, error::RLPDecodeError}; use ethrex_storage::{EngineType, STATE_TRIE_SEGMENTS, Store, error::StoreError}; +use ethrex_trie::trie_sorted::TrieGenerationError; use ethrex_trie::{NodeHash, Trie, TrieError}; use rayon::iter::{IntoParallelIterator, ParallelBridge, ParallelIterator}; -use std::collections::{BTreeMap, HashSet}; +use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::path::PathBuf; use std::time::SystemTime; use std::{ @@ -854,12 +856,23 @@ impl Syncer { *METRICS.account_tries_insert_start_time.lock().await = Some(SystemTime::now()); // We read the account leafs from the files in account_state_snapshots_dir, write it into // the trie to compute the nodes and stores the accounts with storages for later use - let computed_state_root = insert_accounts_into_db( - store.clone(), - &mut storage_accounts, - &account_state_snapshots_dir, - ) - .await?; + cfg_if::cfg_if! { + if #[cfg(feature = "rocksdb")] { + let computed_state_root = insert_accounts_into_rocksdb( + store.clone(), + &mut storage_accounts, + &account_state_snapshots_dir, + &get_rocksdb_temp_accounts_dir(&self.datadir) + ).await?; + let accounts_with_storage: BTreeSet = BTreeSet::from_iter(storage_accounts.accounts_with_storage_root.keys().into_iter().map(|k| *k)); + } else { + let computed_state_root = insert_accounts_into_db( + store.clone(), + &mut storage_accounts, + &account_state_snapshots_dir, + ).await?; + } + } info!( "Finished inserting account ranges, total storage accounts: {}", storage_accounts.accounts_with_storage_root.len() @@ -937,20 +950,31 @@ impl Syncer { ); *METRICS.storage_tries_download_end_time.lock().await = Some(SystemTime::now()); - let maybe_big_account_storage_state_roots: Arc>> = - Arc::new(Mutex::new(HashMap::new())); - *METRICS.storage_tries_insert_start_time.lock().await = Some(SystemTime::now()); *METRICS.current_step.lock().await = "Inserting Storage Ranges - \x1b[31mWriting to DB\x1b[0m".to_string(); let account_storages_snapshots_dir = get_account_storages_snapshots_dir(&self.datadir); - insert_storages_into_db( - store.clone(), - &account_storages_snapshots_dir, - &maybe_big_account_storage_state_roots, - &pivot_header, - ) - .await?; + + cfg_if::cfg_if! { + if #[cfg(feature = "rocksdb")] { + insert_storage_into_rocksdb( + store.clone(), + accounts_with_storage, + &account_storages_snapshots_dir, + &get_rocksdb_temp_storage_dir(&self.datadir) + ).await?; + } else { + let maybe_big_account_storage_state_roots: Arc>> = + Arc::new(Mutex::new(HashMap::new())); + insert_storages_into_db( + store.clone(), + &account_storages_snapshots_dir, + &maybe_big_account_storage_state_roots, + &pivot_header, + ) + .await?; + } + } *METRICS.storage_tries_insert_end_time.lock().await = Some(SystemTime::now()); @@ -1231,6 +1255,14 @@ pub enum SyncError { PeerHandler(#[from] PeerHandlerError), #[error("Corrupt Path")] CorruptPath, + #[error("Sorted Trie Generation Error: {0}")] + TrieGenerationError(#[from] TrieGenerationError), + #[error("Failed to get account temp db directory")] + AccountTempDBDirNotFound, + #[error("Failed to get storage temp db directory")] + StorageTempDBDirNotFound, + #[error("RocksDB Error: {0}")] + RocksDBError(String), } impl From> for SyncError { @@ -1406,3 +1438,81 @@ async fn insert_storages_into_db( } Ok(()) } + +#[cfg(feature = "rocksdb")] +async fn insert_accounts_into_rocksdb( + store: Store, + storage_accounts: &mut AccountStorageRoots, + account_state_snapshots_dir: &str, + temp_db_dir: &str, +) -> Result { + use ethrex_trie::trie_sorted::trie_from_sorted_accounts_wrap; + + let trie = store.open_state_trie(*EMPTY_TRIE_HASH)?; + let mut db_options = rocksdb::Options::default(); + db_options.create_if_missing(true); + let db = rocksdb::DB::open(&db_options, temp_db_dir) + .map_err(|_| SyncError::AccountTempDBDirNotFound)?; + let file_paths: Vec = std::fs::read_dir(account_state_snapshots_dir) + .map_err(|_| SyncError::AccountStateSnapshotsDirNotFound)? + .collect::, _>>() + .map_err(|_| SyncError::AccountStateSnapshotsDirNotFound)? + .into_iter() + .map(|res| res.path()) + .collect(); + db.ingest_external_file(file_paths) + .map_err(|err| SyncError::RocksDBError(err.into_string()))?; + let iter = db.full_iterator(rocksdb::IteratorMode::Start); + trie_from_sorted_accounts_wrap( + trie.db(), + &mut iter + .map(|k| k.expect("We shouldn't have a rocksdb error here")) // TODO: remove unwrap + .inspect(|(k, v)| { + let account_state = AccountState::decode(v).expect("We should have accounts here"); + if account_state.storage_root != *EMPTY_TRIE_HASH { + storage_accounts + .accounts_with_storage_root + .insert(H256::from_slice(k), account_state.storage_root); + } + }) + .map(|(k, v)| (H256::from_slice(&k), v.to_vec())), + ) + .map_err(SyncError::TrieGenerationError) +} + +#[cfg(feature = "rocksdb")] +async fn insert_storage_into_rocksdb( + store: Store, + accounts_with_storage: BTreeSet, + account_state_snapshots_dir: &str, + temp_db_dir: &str, +) -> Result<(), SyncError> { + use ethrex_trie::trie_sorted::trie_from_sorted_accounts_wrap; + + let mut db_options = rocksdb::Options::default(); + db_options.create_if_missing(true); + let db = rocksdb::DB::open(&db_options, temp_db_dir) + .map_err(|_| SyncError::StorageTempDBDirNotFound)?; + let file_paths: Vec = std::fs::read_dir(account_state_snapshots_dir) + .map_err(|_| SyncError::AccountStoragesSnapshotsDirNotFound)? + .collect::, _>>() + .map_err(|_| SyncError::AccountStoragesSnapshotsDirNotFound)? + .into_iter() + .map(|res| res.path()) + .collect(); + db.ingest_external_file(file_paths) + .map_err(|err| SyncError::RocksDBError(err.into_string()))?; + + for account_hash in accounts_with_storage { + let trie = store.open_storage_trie(account_hash, *EMPTY_TRIE_HASH)?; + let iter = db.prefix_iterator(account_hash.as_bytes()); + trie_from_sorted_accounts_wrap( + trie.db(), + &mut iter + .map(|k| k.expect("We shouldn't have a rocksdb error here")) // TODO: remove unwrap + .map(|(k, v)| (H256::from_slice(&k), v.to_vec())), + ) + .map_err(SyncError::TrieGenerationError)?; + } + Ok(()) +} diff --git a/crates/networking/p2p/utils.rs b/crates/networking/p2p/utils.rs index 690b4910f1d..9bbedda2359 100644 --- a/crates/networking/p2p/utils.rs +++ b/crates/networking/p2p/utils.rs @@ -65,6 +65,14 @@ pub fn get_account_state_snapshots_dir(datadir: &String) -> String { format!("{datadir}/account_state_snapshots") } +pub fn get_rocksdb_temp_accounts_dir(datadir: &String) -> String { + format!("{datadir}/temp_acc_dir") +} + +pub fn get_rocksdb_temp_storage_dir(datadir: &String) -> String { + format!("{datadir}/temp_storage_dir") +} + pub fn get_account_state_snapshot_file(directory: String, chunk_index: u64) -> String { format!("{directory}/account_state_chunk.rlp.{chunk_index}") } From 1dc3e745f2937abcbad2e5ed4f0c905dfc02b092 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Thu, 18 Sep 2025 18:27:45 -0300 Subject: [PATCH 012/115] dedup --- crates/networking/p2p/utils.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crates/networking/p2p/utils.rs b/crates/networking/p2p/utils.rs index 9bbedda2359..86779c85a10 100644 --- a/crates/networking/p2p/utils.rs +++ b/crates/networking/p2p/utils.rs @@ -87,6 +87,11 @@ pub fn dump_to_rocks_db( mut contents: Vec<(Vec, Vec)>, ) -> Result<(), rocksdb::Error> { contents.sort(); + contents.dedup_by_key(|(k, _)| { + let mut buf = [0u8; 64]; + buf[..k.len()].copy_from_slice(k); + buf + }); let writer_options = rocksdb::Options::default(); let mut writer = rocksdb::SstFileWriter::create(&writer_options); writer.open(std::path::Path::new(&path))?; From 500be72842639117c7deb46a120b695cd9390856 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Thu, 18 Sep 2025 18:28:46 -0300 Subject: [PATCH 013/115] Update Cargo.toml --- cmd/ethrex/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/ethrex/Cargo.toml b/cmd/ethrex/Cargo.toml index 02ca0802c17..49d621a41ce 100644 --- a/cmd/ethrex/Cargo.toml +++ b/cmd/ethrex/Cargo.toml @@ -61,7 +61,7 @@ path = "./lib.rs" [features] debug = ["ethrex-vm/debug"] -default = ["libmdbx", "c-kzg", "rollup_storage_sql", "dev", "metrics", "rocksdb"] +default = ["libmdbx", "c-kzg", "rollup_storage_sql", "dev", "metrics"] dev = ["dep:ethrex-dev"] c-kzg = [ "ethrex-vm/c-kzg", From 5a49a83552361e29024e63f8c1eec3a1cb2ef5e5 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Thu, 18 Sep 2025 18:36:50 -0300 Subject: [PATCH 014/115] Update sync.rs --- crates/networking/p2p/sync.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 5b6c40204e7..7107b5ad2cb 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1510,7 +1510,7 @@ async fn insert_storage_into_rocksdb( trie.db(), &mut iter .map(|k| k.expect("We shouldn't have a rocksdb error here")) // TODO: remove unwrap - .map(|(k, v)| (H256::from_slice(&k), v.to_vec())), + .map(|(k, v)| (H256::from_slice(&k[32..]), v.to_vec())), ) .map_err(SyncError::TrieGenerationError)?; } From 79b6795621759bde17f79643d69f219ca4306e9d Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Thu, 18 Sep 2025 19:14:03 -0300 Subject: [PATCH 015/115] par iter --- crates/networking/p2p/sync.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 7107b5ad2cb..1195680e54d 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1468,6 +1468,9 @@ async fn insert_accounts_into_rocksdb( &mut iter .map(|k| k.expect("We shouldn't have a rocksdb error here")) // TODO: remove unwrap .inspect(|(k, v)| { + METRICS + .account_tries_inserted + .fetch_add(1, Ordering::Relaxed); let account_state = AccountState::decode(v).expect("We should have accounts here"); if account_state.storage_root != *EMPTY_TRIE_HASH { storage_accounts @@ -1488,6 +1491,7 @@ async fn insert_storage_into_rocksdb( temp_db_dir: &str, ) -> Result<(), SyncError> { use ethrex_trie::trie_sorted::trie_from_sorted_accounts_wrap; + use rayon::iter::IntoParallelRefIterator; let mut db_options = rocksdb::Options::default(); db_options.create_if_missing(true); @@ -1503,8 +1507,8 @@ async fn insert_storage_into_rocksdb( db.ingest_external_file(file_paths) .map_err(|err| SyncError::RocksDBError(err.into_string()))?; - for account_hash in accounts_with_storage { - let trie = store.open_storage_trie(account_hash, *EMPTY_TRIE_HASH)?; + accounts_with_storage.par_iter().for_each(|account_hash| { + let trie = store.open_storage_trie(*account_hash, *EMPTY_TRIE_HASH); let iter = db.prefix_iterator(account_hash.as_bytes()); trie_from_sorted_accounts_wrap( trie.db(), @@ -1512,7 +1516,7 @@ async fn insert_storage_into_rocksdb( .map(|k| k.expect("We shouldn't have a rocksdb error here")) // TODO: remove unwrap .map(|(k, v)| (H256::from_slice(&k[32..]), v.to_vec())), ) - .map_err(SyncError::TrieGenerationError)?; - } + .map_err(SyncError::TrieGenerationError); + }); Ok(()) } From 278da8a4749b7f72c72a51dd87b37460e655e3b3 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Thu, 18 Sep 2025 19:18:58 -0300 Subject: [PATCH 016/115] Update sync.rs --- crates/networking/p2p/sync.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 1195680e54d..aad4701e941 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1508,7 +1508,9 @@ async fn insert_storage_into_rocksdb( .map_err(|err| SyncError::RocksDBError(err.into_string()))?; accounts_with_storage.par_iter().for_each(|account_hash| { - let trie = store.open_storage_trie(*account_hash, *EMPTY_TRIE_HASH); + let trie = store + .open_storage_trie(*account_hash, *EMPTY_TRIE_HASH) + .expect("Should be able to open trie"); let iter = db.prefix_iterator(account_hash.as_bytes()); trie_from_sorted_accounts_wrap( trie.db(), From 90f0752346052d1b0fd854835293393e78c1a2de Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Mon, 22 Sep 2025 10:38:41 -0300 Subject: [PATCH 017/115] Fixed compile --- crates/networking/p2p/peer_handler.rs | 8 ++++---- crates/networking/p2p/sync.rs | 14 +++++++------- crates/networking/p2p/utils.rs | 27 ++++++++++----------------- 3 files changed, 21 insertions(+), 28 deletions(-) diff --git a/crates/networking/p2p/peer_handler.rs b/crates/networking/p2p/peer_handler.rs index bac66553085..9219a0d515c 100644 --- a/crates/networking/p2p/peer_handler.rs +++ b/crates/networking/p2p/peer_handler.rs @@ -798,7 +798,7 @@ impl PeerHandler { chunk_file, ); // TODO: check the error type and handle it properly - let result = dump_accounts_to_file(path, account_state_chunk); + let result = dump_accounts_to_file(&path, account_state_chunk); dump_account_result_sender_cloned .send(result) .await @@ -941,7 +941,7 @@ impl PeerHandler { } let path = get_account_state_snapshot_file(account_state_snapshots_dir, chunk_file); - dump_accounts_to_file(path, account_state_chunk) + dump_accounts_to_file(&path, account_state_chunk) .inspect_err(|err| { error!( "We had an error dumping the last accounts to disk {}", @@ -1368,7 +1368,7 @@ impl PeerHandler { &account_storages_snapshots_dir_cloned, chunk_index, ); - dump_storages_to_file(path, snapshot) + dump_storages_to_file(&path, snapshot) }); chunk_index += 1; @@ -1582,7 +1582,7 @@ impl PeerHandler { } let path = get_account_storages_snapshot_file(account_storages_snapshots_dir, chunk_index); - dump_storages_to_file(path, snapshot) + dump_storages_to_file(&path, snapshot) .map_err(|_| PeerHandlerError::WriteStorageSnapshotsDir(chunk_index))?; } disk_joinset diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 5a3e5e1ae2c..66e5a4bd474 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -27,7 +27,7 @@ use ethrex_trie::trie_sorted::TrieGenerationError; use ethrex_trie::{NodeHash, Trie, TrieError}; use rayon::iter::{IntoParallelIterator, ParallelBridge, ParallelIterator}; use std::collections::{BTreeMap, BTreeSet, HashSet}; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::time::SystemTime; use std::{ array, @@ -1432,7 +1432,7 @@ pub async fn validate_bytecodes(store: Store, state_root: H256) -> bool { async fn insert_accounts_into_db( store: Store, storage_accounts: &mut AccountStorageRoots, - account_state_snapshots_dir: &str, + account_state_snapshots_dir: &Path, ) -> Result { let mut computed_state_root = *EMPTY_TRIE_HASH; for entry in std::fs::read_dir(account_state_snapshots_dir) @@ -1478,7 +1478,7 @@ async fn insert_accounts_into_db( async fn insert_storages_into_db( store: Store, - account_storages_snapshots_dir: &str, + account_storages_snapshots_dir: &Path, maybe_big_account_storage_state_roots: &Arc>>, pivot_header: &BlockHeader, ) -> Result<(), SyncError> { @@ -1537,8 +1537,8 @@ async fn insert_storages_into_db( async fn insert_accounts_into_rocksdb( store: Store, storage_accounts: &mut AccountStorageRoots, - account_state_snapshots_dir: &str, - temp_db_dir: &str, + account_state_snapshots_dir: &Path, + temp_db_dir: &Path, ) -> Result { use ethrex_trie::trie_sorted::trie_from_sorted_accounts_wrap; @@ -1581,8 +1581,8 @@ async fn insert_accounts_into_rocksdb( async fn insert_storage_into_rocksdb( store: Store, accounts_with_storage: BTreeSet, - account_state_snapshots_dir: &str, - temp_db_dir: &str, + account_state_snapshots_dir: &Path, + temp_db_dir: &Path, ) -> Result<(), SyncError> { use ethrex_trie::trie_sorted::trie_from_sorted_accounts_wrap; use rayon::iter::IntoParallelRefIterator; diff --git a/crates/networking/p2p/utils.rs b/crates/networking/p2p/utils.rs index 1bb53d3dab2..48fcc34befa 100644 --- a/crates/networking/p2p/utils.rs +++ b/crates/networking/p2p/utils.rs @@ -66,16 +66,14 @@ pub fn get_account_state_snapshots_dir(datadir: &Path) -> PathBuf { datadir.join("account_state_snapshots") } -pub fn get_rocksdb_temp_accounts_dir(datadir: &String) -> String { - format!("{datadir}/temp_acc_dir") +pub fn get_rocksdb_temp_accounts_dir(datadir: &Path) -> PathBuf { + datadir.join("temp_acc_dir") } -pub fn get_rocksdb_temp_storage_dir(datadir: &String) -> String { - format!("{datadir}/temp_storage_dir") +pub fn get_rocksdb_temp_storage_dir(datadir: &Path) -> PathBuf { + datadir.join("temp_storage_dir") } -pub fn get_account_state_snapshot_file(directory: String, chunk_index: u64) -> String { - format!("{directory}/account_state_chunk.rlp.{chunk_index}") pub fn get_account_state_snapshot_file(directory: &Path, chunk_index: u64) -> PathBuf { directory.join(format!("account_state_chunk.rlp.{chunk_index}")) } @@ -86,7 +84,7 @@ pub fn get_account_storages_snapshot_file(directory: &Path, chunk_index: u64) -> #[cfg(feature = "rocksdb")] pub fn dump_to_rocks_db( - path: String, + path: &Path, mut contents: Vec<(Vec, Vec)>, ) -> Result<(), rocksdb::Error> { contents.sort(); @@ -104,11 +102,6 @@ pub fn dump_to_rocks_db( writer.finish() } -pub fn dump_to_file(path: String, contents: Vec) -> Result<(), DumpError> { - std::fs::write(&path, &contents) - .inspect_err(|err| { - tracing::error!("Failed to write snapshot to path {}. Error: {}", &path, err) - }) pub fn get_code_hashes_snapshots_dir(datadir: &Path) -> PathBuf { datadir.join("bytecode_hashes_snapshots") } @@ -128,7 +121,7 @@ pub fn dump_to_file(path: &Path, contents: Vec) -> Result<(), DumpError> { } pub fn dump_accounts_to_file( - path: String, + path: &Path, accounts: Vec<(H256, AccountState)>, ) -> Result<(), DumpError> { cfg_if::cfg_if! { @@ -142,7 +135,7 @@ pub fn dump_accounts_to_file( ) .inspect_err(|err| error!("Rocksdb writing stt error {err:?}")) .map_err(|_| DumpError { - path, + path: path.to_path_buf(), contents: Vec::new(), error: std::io::ErrorKind::Other, }) @@ -153,13 +146,13 @@ pub fn dump_accounts_to_file( } pub fn dump_storages_to_file( - path: String, + path: &Path, storages: Vec<(H256, Vec<(H256, U256)>)>, ) -> Result<(), DumpError> { cfg_if::cfg_if! { if #[cfg(feature = "rocksdb")] { dump_to_rocks_db( - path.clone(), + &path, storages .into_iter() .flat_map(|(hash, slots)| { @@ -171,7 +164,7 @@ pub fn dump_storages_to_file( ) .inspect_err(|err| error!("Rocksdb writing stt error {err:?}")) .map_err(|_| DumpError { - path, + path: path.to_path_buf(), contents: Vec::new(), error: std::io::ErrorKind::Other, }) From 0a38b2000f558d02753cbba6a61e14b1d36e4d2e Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 23 Sep 2025 14:34:47 -0300 Subject: [PATCH 018/115] added logging for error --- crates/networking/p2p/sync.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 66e5a4bd474..89388d5b5a8 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1606,12 +1606,17 @@ async fn insert_storage_into_rocksdb( .open_storage_trie(*account_hash, *EMPTY_TRIE_HASH) .expect("Should be able to open trie"); let iter = db.prefix_iterator(account_hash.as_bytes()); - trie_from_sorted_accounts_wrap( + let _ = trie_from_sorted_accounts_wrap( trie.db(), &mut iter .map(|k| k.expect("We shouldn't have a rocksdb error here")) // TODO: remove unwrap .map(|(k, v)| (H256::from_slice(&k[32..]), v.to_vec())), ) + .inspect_err(|err: &TrieGenerationError| { + error!( + "we found an error while inserting the storage trie for the account {account_hash}, err {err}" + ) + }) .map_err(SyncError::TrieGenerationError); }); Ok(()) From 85cd81742b5a101009ce2b362c9747106bcf3ae3 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 23 Sep 2025 14:51:15 -0300 Subject: [PATCH 019/115] Fixed edge case and pushed fix --- crates/common/trie/trie_sorted.rs | 32 +++++++++++++++++++++++++++---- crates/networking/p2p/sync.rs | 2 +- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/crates/common/trie/trie_sorted.rs b/crates/common/trie/trie_sorted.rs index 091dec12472..3861eafd3f7 100644 --- a/crates/common/trie/trie_sorted.rs +++ b/crates/common/trie/trie_sorted.rs @@ -138,7 +138,7 @@ fn flush_nodes_to_write( #[inline(never)] pub fn trie_from_sorted_accounts<'scope, T>( db: &'scope dyn TrieDB, - accounts_iter: &mut T, + data_iter: &mut T, scoped_thread: &'scope Scope<'scope, '_>, ) -> Result where @@ -149,8 +149,20 @@ where let mut write_threads = Vec::new(); let mut left_side = StackElement::default(); - let mut center_side: CenterSide = CenterSide::from_value(accounts_iter.next().unwrap()); - let mut right_side_opt: Option<(H256, Vec)> = accounts_iter.next(); + let initial_value = data_iter.next().unwrap(); + let mut center_side: CenterSide = CenterSide::from_value(initial_value.clone()); + let mut right_side_opt: Option<(H256, Vec)> = data_iter.next(); + + // Edge Case + if right_side_opt.is_none() { + let node = LeafNode { + partial: center_side.path, + value: initial_value.1, + }; + let hash = node.compute_hash().finalize(); + flush_nodes_to_write(vec![node.into()], db)?; + return Ok(hash); + } while let Some(right_side) = right_side_opt { if nodes_to_write.len() as u64 > SIZE_TO_WRITE_DB { @@ -188,7 +200,7 @@ where left_side = element; } center_side = CenterSide::from_value(right_side); - right_side_opt = accounts_iter.next(); + right_side_opt = data_iter.next(); } while !is_child(¢er_side.path, &left_side) { @@ -328,6 +340,13 @@ mod test { accounts } + fn generate_input_4() -> BTreeMap> { + let mut accounts: BTreeMap> = BTreeMap::new(); + let string = "0532f23d3bd5277790ece5a6cb6fc684bc473a91ffe3a0334049527c4f6987e9"; + accounts.insert(H256::from_str(string).unwrap(), vec![0, 1]); + accounts + } + fn generate_input_slots_1() -> BTreeMap { let mut slots: BTreeMap = BTreeMap::new(); for string in [ @@ -399,6 +418,11 @@ mod test { run_test_account_state(generate_input_3()); } + #[test] + fn test_4() { + run_test_account_state(generate_input_4()); + } + #[test] fn test_slots_1() { run_test_storage_slots(generate_input_slots_1()); diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 89388d5b5a8..a79d47c053c 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1614,7 +1614,7 @@ async fn insert_storage_into_rocksdb( ) .inspect_err(|err: &TrieGenerationError| { error!( - "we found an error while inserting the storage trie for the account {account_hash}, err {err}" + "we found an error while inserting the storage trie for the account {account_hash:x}, err {err}" ) }) .map_err(SyncError::TrieGenerationError); From 211a6b666b4e372a2b0eb7b73e4e352468b8a58c Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 23 Sep 2025 15:23:45 -0300 Subject: [PATCH 020/115] print the account with an error --- crates/networking/p2p/sync.rs | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index a79d47c053c..b70685caeab 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1601,23 +1601,30 @@ async fn insert_storage_into_rocksdb( db.ingest_external_file(file_paths) .map_err(|err| SyncError::RocksDBError(err.into_string()))?; - accounts_with_storage.par_iter().for_each(|account_hash| { + for account_hash in accounts_with_storage { let trie = store - .open_storage_trie(*account_hash, *EMPTY_TRIE_HASH) + .open_storage_trie(account_hash, *EMPTY_TRIE_HASH) .expect("Should be able to open trie"); let iter = db.prefix_iterator(account_hash.as_bytes()); - let _ = trie_from_sorted_accounts_wrap( + let result = trie_from_sorted_accounts_wrap( trie.db(), &mut iter .map(|k| k.expect("We shouldn't have a rocksdb error here")) // TODO: remove unwrap .map(|(k, v)| (H256::from_slice(&k[32..]), v.to_vec())), ) .inspect_err(|err: &TrieGenerationError| { + let iter = db.prefix_iterator(account_hash.as_bytes()); + let mut count = 0 as usize; + for element in iter { + let element_unwrap = element.unwrap(); + println!("{element_unwrap:?}"); + count += 1; + } error!( - "we found an error while inserting the storage trie for the account {account_hash:x}, err {err}" - ) + "we found an error while inserting the storage trie for the account {account_hash:x}, err {err}, count {count}" + ); }) - .map_err(SyncError::TrieGenerationError); - }); + .map_err(SyncError::TrieGenerationError)?; + } Ok(()) } From 141019c99754ebb0635a9a4b18aeaa4d61f87486 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 23 Sep 2025 15:31:31 -0300 Subject: [PATCH 021/115] what's going on --- crates/networking/p2p/sync.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index b70685caeab..bf183464a55 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1614,12 +1614,7 @@ async fn insert_storage_into_rocksdb( ) .inspect_err(|err: &TrieGenerationError| { let iter = db.prefix_iterator(account_hash.as_bytes()); - let mut count = 0 as usize; - for element in iter { - let element_unwrap = element.unwrap(); - println!("{element_unwrap:?}"); - count += 1; - } + let mut count = iter.count(); error!( "we found an error while inserting the storage trie for the account {account_hash:x}, err {err}, count {count}" ); From 6fa1883eb0507d4dadc094931fcae827da4f094a Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 23 Sep 2025 15:53:00 -0300 Subject: [PATCH 022/115] Fix attempt --- crates/networking/p2p/sync.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index bf183464a55..d0a56bcf2e5 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1610,11 +1610,11 @@ async fn insert_storage_into_rocksdb( trie.db(), &mut iter .map(|k| k.expect("We shouldn't have a rocksdb error here")) // TODO: remove unwrap - .map(|(k, v)| (H256::from_slice(&k[32..]), v.to_vec())), + .map(|(k, v)| (H256::from_slice(&k[32..]), v.to_vec())) + .skip_while(|(hash, _)| *hash != account_hash) + .take_while(|(hash, _)| *hash == account_hash), ) .inspect_err(|err: &TrieGenerationError| { - let iter = db.prefix_iterator(account_hash.as_bytes()); - let mut count = iter.count(); error!( "we found an error while inserting the storage trie for the account {account_hash:x}, err {err}, count {count}" ); From 20b1a7ccd8a0053436c634520277827091631236 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 23 Sep 2025 15:54:09 -0300 Subject: [PATCH 023/115] Update sync.rs --- crates/networking/p2p/sync.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index d0a56bcf2e5..67b113c2da4 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1616,7 +1616,7 @@ async fn insert_storage_into_rocksdb( ) .inspect_err(|err: &TrieGenerationError| { error!( - "we found an error while inserting the storage trie for the account {account_hash:x}, err {err}, count {count}" + "we found an error while inserting the storage trie for the account {account_hash:x}, err {err}" ); }) .map_err(SyncError::TrieGenerationError)?; From 01269197315d945f2dec07ef9fb38a6c6efe1efd Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 23 Sep 2025 16:03:33 -0300 Subject: [PATCH 024/115] Update trie_sorted.rs --- crates/common/trie/trie_sorted.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/common/trie/trie_sorted.rs b/crates/common/trie/trie_sorted.rs index 3861eafd3f7..53988a5ac8b 100644 --- a/crates/common/trie/trie_sorted.rs +++ b/crates/common/trie/trie_sorted.rs @@ -1,7 +1,7 @@ use std::thread::{Scope, scope}; use crate::{ - Nibbles, Node, TrieDB, TrieError, + EMPTY_TRIE_HASH, Nibbles, Node, TrieDB, TrieError, node::{BranchNode, ExtensionNode, LeafNode}, }; use ethereum_types::H256; @@ -149,7 +149,9 @@ where let mut write_threads = Vec::new(); let mut left_side = StackElement::default(); - let initial_value = data_iter.next().unwrap(); + let Some(initial_value) = data_iter.next() else { + return Ok(*EMPTY_TRIE_HASH); + }; let mut center_side: CenterSide = CenterSide::from_value(initial_value.clone()); let mut right_side_opt: Option<(H256, Vec)> = data_iter.next(); From 33f750a87ade738a821e8c845fd9a53415079c9e Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 23 Sep 2025 16:24:27 -0300 Subject: [PATCH 025/115] Added inc --- crates/networking/p2p/network.rs | 3 ++- crates/networking/p2p/sync.rs | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/networking/p2p/network.rs b/crates/networking/p2p/network.rs index dd7cd5a9d99..d4568859780 100644 --- a/crates/networking/p2p/network.rs +++ b/crates/networking/p2p/network.rs @@ -261,6 +261,7 @@ pub async fn periodically_show_peer_stats_during_syncing( // Storage leaves metrics let storage_leaves_downloaded = METRICS.downloaded_storage_slots.load(Ordering::Relaxed); + let storage_accounts_inserted = METRICS.storage_tries_state_roots_computed.get(); let storage_accounts = METRICS.storage_accounts_initial.load(Ordering::Relaxed); let storage_accounts_healed = METRICS.storage_accounts_healed.load(Ordering::Relaxed); let storage_leaves_time = format_duration({ @@ -364,7 +365,7 @@ headers progress: {headers_download_progress} (total: {headers_to_download}, dow account leaves download: {account_leaves_downloaded}, elapsed: {account_leaves_time} account leaves insertion: {account_leaves_inserted_percentage:.2}%, elapsed: {account_leaves_inserted_time} storage leaves download: {storage_leaves_downloaded}, elapsed: {storage_leaves_time}, initially accounts with storage {storage_accounts}, healed accounts {storage_accounts_healed} -storage leaves insertion: {storage_leaves_inserted_time} +storage leaves insertion: {storage_accounts_inserted}, {storage_leaves_inserted_time} healing: global accounts healed {healed_accounts} global storage slots healed {healed_storages}, elapsed: {heal_time}, current throttle {heal_current_throttle} bytecodes progress: downloaded: {bytecodes_downloaded}, elapsed: {bytecodes_download_time})" ); diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 67b113c2da4..1dd1dea050e 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1620,6 +1620,7 @@ async fn insert_storage_into_rocksdb( ); }) .map_err(SyncError::TrieGenerationError)?; + METRICS.storage_tries_state_roots_computed.inc(); } Ok(()) } From 34eebd1ea0cf126c7e7718f12ff2a12f3b0bc431 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 23 Sep 2025 17:14:58 -0300 Subject: [PATCH 026/115] rocksdb iterator --- crates/networking/p2p/sync.rs | 51 ++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 1dd1dea050e..c9130d4d54d 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1586,6 +1586,41 @@ async fn insert_storage_into_rocksdb( ) -> Result<(), SyncError> { use ethrex_trie::trie_sorted::trie_from_sorted_accounts_wrap; use rayon::iter::IntoParallelRefIterator; + use rocksdb::DBCommon; + + struct RocksDBIterator<'a> { + iter: rocksdb::DBRawIterator<'a>, + limit: H256, + } + + impl<'a> Iterator for RocksDBIterator<'a> { + type Item = (H256, Vec); + + fn next(&mut self) -> Option { + if !self.iter.valid() { + return None; + } + let return_value = { + let key = self.iter.key(); + let value = self.iter.value(); + match (key, value) { + (Some(key), Some(value)) => { + let hash = H256::from_slice(&key[0..32]); + let key = H256::from_slice(&key[32..]); + let value = value.to_vec(); + if hash != self.limit { + None + } else { + Some((key, value)) + } + } + _ => None, + } + }; + self.iter.next(); + return_value + } + } let mut db_options = rocksdb::Options::default(); db_options.create_if_missing(true); @@ -1605,14 +1640,18 @@ async fn insert_storage_into_rocksdb( let trie = store .open_storage_trie(account_hash, *EMPTY_TRIE_HASH) .expect("Should be able to open trie"); - let iter = db.prefix_iterator(account_hash.as_bytes()); + let mut iter = db.raw_iterator(); + let mut initial_key = account_hash.as_bytes().to_vec(); + initial_key.extend([0_u8; 32]); + iter.seek(initial_key); + let mut iter = RocksDBIterator { + iter, + limit: account_hash, + }; + let result = trie_from_sorted_accounts_wrap( trie.db(), - &mut iter - .map(|k| k.expect("We shouldn't have a rocksdb error here")) // TODO: remove unwrap - .map(|(k, v)| (H256::from_slice(&k[32..]), v.to_vec())) - .skip_while(|(hash, _)| *hash != account_hash) - .take_while(|(hash, _)| *hash == account_hash), + &mut iter, ) .inspect_err(|err: &TrieGenerationError| { error!( From 80557adb8ae2f9f00e5eba1fb77d031dd806d76f Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 23 Sep 2025 18:32:34 -0300 Subject: [PATCH 027/115] fixed bytecodes --- crates/networking/p2p/sync.rs | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index c9130d4d54d..730f47015b1 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -854,7 +854,8 @@ impl Syncer { std::fs::create_dir_all(&code_hashes_snapshot_dir).map_err(|_| SyncError::CorruptPath)?; // Create collector to store code hashes in files - let mut code_hash_collector = CodeHashCollector::new(code_hashes_snapshot_dir.clone()); + let mut code_hash_collector: CodeHashCollector = + CodeHashCollector::new(code_hashes_snapshot_dir.clone()); let mut storage_accounts = AccountStorageRoots::default(); if !std::env::var("SKIP_START_SNAP_SYNC").is_ok_and(|var| !var.is_empty()) { @@ -884,7 +885,8 @@ impl Syncer { store.clone(), &mut storage_accounts, &account_state_snapshots_dir, - &get_rocksdb_temp_accounts_dir(&self.datadir) + &get_rocksdb_temp_accounts_dir(&self.datadir), + &mut code_hash_collector, ).await?; let accounts_with_storage: BTreeSet = BTreeSet::from_iter(storage_accounts.accounts_with_storage_root.keys().into_iter().map(|k| *k)); } else { @@ -892,6 +894,7 @@ impl Syncer { store.clone(), &mut storage_accounts, &account_state_snapshots_dir, + &mut code_hash_collector, ).await?; } } @@ -1433,6 +1436,7 @@ async fn insert_accounts_into_db( store: Store, storage_accounts: &mut AccountStorageRoots, account_state_snapshots_dir: &Path, + code_hash_collector: &mut CodeHashCollector, ) -> Result { let mut computed_state_root = *EMPTY_TRIE_HASH; for entry in std::fs::read_dir(account_state_snapshots_dir) @@ -1454,6 +1458,17 @@ async fn insert_accounts_into_db( }), ); + // Collect valid code hashes from current account snapshot + let code_hashes_from_snapshot: Vec = account_states_snapshot + .iter() + .filter_map(|(_, state)| { + (state.code_hash != *EMPTY_KECCACK_HASH).then_some(state.code_hash) + }) + .collect(); + + code_hash_collector.extend(code_hashes_from_snapshot); + code_hash_collector.flush_if_needed().await?; + info!("Inserting accounts into the state trie"); let store_clone = store.clone(); @@ -1539,6 +1554,7 @@ async fn insert_accounts_into_rocksdb( storage_accounts: &mut AccountStorageRoots, account_state_snapshots_dir: &Path, temp_db_dir: &Path, + code_hash_collector: &mut CodeHashCollector, ) -> Result { use ethrex_trie::trie_sorted::trie_from_sorted_accounts_wrap; @@ -1571,6 +1587,10 @@ async fn insert_accounts_into_rocksdb( .accounts_with_storage_root .insert(H256::from_slice(k), account_state.storage_root); } + if account_state.code_hash != *EMPTY_KECCACK_HASH { + code_hash_collector.add(account_state.code_hash); + code_hash_collector.flush_if_needed(); + } }) .map(|(k, v)| (H256::from_slice(&k), v.to_vec())), ) From dc8d0ec3126697806eb73820cc92c97f9a90cf2a Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 23 Sep 2025 19:06:54 -0300 Subject: [PATCH 028/115] returned par iter --- crates/networking/p2p/sync.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 730f47015b1..25256ea42b9 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1656,7 +1656,7 @@ async fn insert_storage_into_rocksdb( db.ingest_external_file(file_paths) .map_err(|err| SyncError::RocksDBError(err.into_string()))?; - for account_hash in accounts_with_storage { + accounts_with_storage.into_par_iter().for_each(|account_hash| { let trie = store .open_storage_trie(account_hash, *EMPTY_TRIE_HASH) .expect("Should be able to open trie"); @@ -1678,8 +1678,8 @@ async fn insert_storage_into_rocksdb( "we found an error while inserting the storage trie for the account {account_hash:x}, err {err}" ); }) - .map_err(SyncError::TrieGenerationError)?; + .map_err(SyncError::TrieGenerationError); METRICS.storage_tries_state_roots_computed.inc(); - } + }); Ok(()) } From 4298c21eb116734523ae0acca30db4e4d848c017 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Wed, 24 Sep 2025 11:02:44 -0300 Subject: [PATCH 029/115] added logging --- crates/networking/p2p/sync.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 25256ea42b9..cc25fad2e40 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1657,9 +1657,11 @@ async fn insert_storage_into_rocksdb( .map_err(|err| SyncError::RocksDBError(err.into_string()))?; accounts_with_storage.into_par_iter().for_each(|account_hash| { + info!("Opening Trie"); let trie = store .open_storage_trie(account_hash, *EMPTY_TRIE_HASH) .expect("Should be able to open trie"); + info!("Creating Iter Trie"); let mut iter = db.raw_iterator(); let mut initial_key = account_hash.as_bytes().to_vec(); initial_key.extend([0_u8; 32]); @@ -1669,6 +1671,7 @@ async fn insert_storage_into_rocksdb( limit: account_hash, }; + info!("Starting Sorted"); let result = trie_from_sorted_accounts_wrap( trie.db(), &mut iter, @@ -1679,6 +1682,7 @@ async fn insert_storage_into_rocksdb( ); }) .map_err(SyncError::TrieGenerationError); + info!("Increasing Counter"); METRICS.storage_tries_state_roots_computed.inc(); }); Ok(()) From 74f2e5a7f65c864fd29b1c45c3888d3eee611942 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Wed, 24 Sep 2025 11:23:46 -0300 Subject: [PATCH 030/115] Scoped thread --- crates/networking/p2p/sync.rs | 59 +++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index cc25fad2e40..9abcc26391b 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1656,34 +1656,39 @@ async fn insert_storage_into_rocksdb( db.ingest_external_file(file_paths) .map_err(|err| SyncError::RocksDBError(err.into_string()))?; - accounts_with_storage.into_par_iter().for_each(|account_hash| { - info!("Opening Trie"); - let trie = store - .open_storage_trie(account_hash, *EMPTY_TRIE_HASH) - .expect("Should be able to open trie"); - info!("Creating Iter Trie"); - let mut iter = db.raw_iterator(); - let mut initial_key = account_hash.as_bytes().to_vec(); - initial_key.extend([0_u8; 32]); - iter.seek(initial_key); - let mut iter = RocksDBIterator { - iter, - limit: account_hash, - }; + std::thread::scope(|s| { + for account_hash in accounts_with_storage { + let store_clone = store.clone(); + let mut iter = db.raw_iterator(); + s.spawn(move || { + info!("Opening Trie"); + let trie = store_clone + .open_storage_trie(account_hash, *EMPTY_TRIE_HASH) + .expect("Should be able to open trie"); + info!("Creating Iter Trie"); + let mut initial_key = account_hash.as_bytes().to_vec(); + initial_key.extend([0_u8; 32]); + iter.seek(initial_key); + let mut iter = RocksDBIterator { + iter, + limit: account_hash, + }; - info!("Starting Sorted"); - let result = trie_from_sorted_accounts_wrap( - trie.db(), - &mut iter, - ) - .inspect_err(|err: &TrieGenerationError| { - error!( - "we found an error while inserting the storage trie for the account {account_hash:x}, err {err}" - ); - }) - .map_err(SyncError::TrieGenerationError); - info!("Increasing Counter"); - METRICS.storage_tries_state_roots_computed.inc(); + info!("Starting Sorted"); + let result = trie_from_sorted_accounts_wrap( + trie.db(), + &mut iter, + ) + .inspect_err(|err: &TrieGenerationError| { + error!( + "we found an error while inserting the storage trie for the account {account_hash:x}, err {err}" + ); + }) + .map_err(SyncError::TrieGenerationError); + info!("Increasing Counter"); + METRICS.storage_tries_state_roots_computed.inc(); + }); + } }); Ok(()) } From 46bb452032d47c06b6bcee88687b9df4b8613619 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Wed, 24 Sep 2025 11:46:16 -0300 Subject: [PATCH 031/115] Removed logging --- crates/networking/p2p/sync.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 9abcc26391b..8f1fcf8e540 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1661,11 +1661,9 @@ async fn insert_storage_into_rocksdb( let store_clone = store.clone(); let mut iter = db.raw_iterator(); s.spawn(move || { - info!("Opening Trie"); let trie = store_clone .open_storage_trie(account_hash, *EMPTY_TRIE_HASH) .expect("Should be able to open trie"); - info!("Creating Iter Trie"); let mut initial_key = account_hash.as_bytes().to_vec(); initial_key.extend([0_u8; 32]); iter.seek(initial_key); @@ -1674,7 +1672,6 @@ async fn insert_storage_into_rocksdb( limit: account_hash, }; - info!("Starting Sorted"); let result = trie_from_sorted_accounts_wrap( trie.db(), &mut iter, @@ -1685,7 +1682,6 @@ async fn insert_storage_into_rocksdb( ); }) .map_err(SyncError::TrieGenerationError); - info!("Increasing Counter"); METRICS.storage_tries_state_roots_computed.inc(); }); } From 5a5d9a00fe3cbc0096bbdd5929a6c84436b5bd44 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Wed, 24 Sep 2025 12:09:08 -0300 Subject: [PATCH 032/115] Don't spawn that many threads --- crates/networking/p2p/sync.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 8f1fcf8e540..746f5952b05 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1657,10 +1657,11 @@ async fn insert_storage_into_rocksdb( .map_err(|err| SyncError::RocksDBError(err.into_string()))?; std::thread::scope(|s| { + let mut joinset = Vec::new(); for account_hash in accounts_with_storage { let store_clone = store.clone(); let mut iter = db.raw_iterator(); - s.spawn(move || { + let handle = s.spawn(move || { let trie = store_clone .open_storage_trie(account_hash, *EMPTY_TRIE_HASH) .expect("Should be able to open trie"); @@ -1683,8 +1684,15 @@ async fn insert_storage_into_rocksdb( }) .map_err(SyncError::TrieGenerationError); METRICS.storage_tries_state_roots_computed.inc(); + result }); + joinset.push(handle); + joinset + .extract_if(.., |handle| handle.is_finished()) + .map(|handle| handle.join()) + .collect::, _>>() + .map_err(|_| SyncError::NotInSnapSync)?; //this needs to be a no new thread } - }); - Ok(()) + Ok(()) + }) } From 0e530465245c84cf637a496991d231a6a6796c48 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Wed, 24 Sep 2025 12:39:59 -0300 Subject: [PATCH 033/115] Improved error handling --- crates/networking/p2p/sync.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 746f5952b05..acb29b3e5e7 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1323,6 +1323,8 @@ pub enum SyncError { NoPeers, #[error("Failed to get block headers")] NoBlockHeaders, + #[error("Couldn't create a thread")] + ThreadCreationError, #[error("Called update_pivot outside snapsync mode")] NotInSnapSync, #[error("Peer handler error: {0}")] @@ -1690,8 +1692,8 @@ async fn insert_storage_into_rocksdb( joinset .extract_if(.., |handle| handle.is_finished()) .map(|handle| handle.join()) - .collect::, _>>() - .map_err(|_| SyncError::NotInSnapSync)?; //this needs to be a no new thread + .collect::, _>, _>>() + .map_err(|_| SyncError::ThreadCreationError)??; //this needs to be a no new thread } Ok(()) }) From 8add39d13ff48c0015efd6a2c9a4fa297cd5b09d Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Wed, 24 Sep 2025 14:24:37 -0300 Subject: [PATCH 034/115] Fixed race condition --- crates/networking/p2p/peer_handler.rs | 51 ++++++--------------------- 1 file changed, 11 insertions(+), 40 deletions(-) diff --git a/crates/networking/p2p/peer_handler.rs b/crates/networking/p2p/peer_handler.rs index 9219a0d515c..e83c56ee5c2 100644 --- a/crates/networking/p2p/peer_handler.rs +++ b/crates/networking/p2p/peer_handler.rs @@ -36,7 +36,7 @@ use crate::{ }, }, snap::encodable_to_proof, - sync::{AccountStorageRoots, BlockSyncState, block_is_stale, update_pivot}, + sync::{AccountStorageRoots, BlockSyncState, SyncError, block_is_stale, update_pivot}, utils::{ SendMessageError, dump_accounts_to_file, dump_storages_to_file, dump_to_file, get_account_state_snapshot_file, get_account_storages_snapshot_file, @@ -761,10 +761,6 @@ impl PeerHandler { let (task_sender, mut task_receiver) = tokio::sync::mpsc::channel::<(Vec, H256, Option<(H256, H256)>)>(1000); - // channel to send the result of dumping accounts - let (dump_account_result_sender, mut dump_account_result_receiver) = - tokio::sync::mpsc::channel::>(1000); - info!("Starting to download account ranges from peers"); *METRICS.account_tries_download_start_time.lock().await = Some(SystemTime::now()); @@ -772,6 +768,7 @@ impl PeerHandler { let mut completed_tasks = 0; let mut chunk_file = 0; let mut last_update: SystemTime = SystemTime::now(); + let mut write_set = tokio::task::JoinSet::new(); loop { if all_accounts_state.len() * size_of::() >= RANGE_FILE_CHUNK_SIZE { @@ -791,22 +788,13 @@ impl PeerHandler { } let account_state_snapshots_dir_cloned = account_state_snapshots_dir.to_path_buf(); - let dump_account_result_sender_cloned = dump_account_result_sender.clone(); - tokio::task::spawn(async move { + write_set.spawn(async move { let path = get_account_state_snapshot_file( &account_state_snapshots_dir_cloned, chunk_file, ); // TODO: check the error type and handle it properly - let result = dump_accounts_to_file(&path, account_state_chunk); - dump_account_result_sender_cloned - .send(result) - .await - .inspect_err(|err| { - error!( - "Failed to send account dump result through channel. Error: {err}" - ) - }) + dump_accounts_to_file(&path, account_state_chunk) }); chunk_file += 1; @@ -857,30 +845,6 @@ impl PeerHandler { ); } - // Check if any dump account task finished - // TODO: consider tracking in-flight (dump) tasks - if let Ok(Err(dump_account_data)) = dump_account_result_receiver.try_recv() { - if dump_account_data.error == ErrorKind::StorageFull { - return Err(PeerHandlerError::StorageFull); - } - // If the dumping failed, retry it - let dump_account_result_sender_cloned = dump_account_result_sender.clone(); - tokio::task::spawn(async move { - let DumpError { path, contents, .. } = dump_account_data; - // Dump the account data - let result = dump_to_file(&path, contents); - // Send the result through the channel - dump_account_result_sender_cloned - .send(result) - .await - .inspect_err(|err| { - error!( - "Failed to send account dump result through channel. Error: {err}" - ) - }) - }); - } - let Some((peer_id, peer_channel)) = self .peer_table .get_peer_channel_with_highest_score_and_mark_as_used(&SUPPORTED_SNAP_CAPABILITIES) @@ -923,6 +887,13 @@ impl PeerHandler { )); } + write_set + .join_all() + .await + .into_iter() + .collect::, DumpError>>() + .map_err(PeerHandlerError::DumpError)?; + // TODO: This is repeated code, consider refactoring { let current_account_hashes = std::mem::take(&mut all_account_hashes); From edc2f91e7743b1b0dd6951060670471d99fe62fd Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Wed, 24 Sep 2025 15:29:17 -0300 Subject: [PATCH 035/115] perf(l1): use BTree for tracking storages during snap sync --- crates/networking/p2p/peer_handler.rs | 69 ++++++++++++++------------- 1 file changed, 37 insertions(+), 32 deletions(-) diff --git a/crates/networking/p2p/peer_handler.rs b/crates/networking/p2p/peer_handler.rs index efaacf9f580..ac6e7d6ce86 100644 --- a/crates/networking/p2p/peer_handler.rs +++ b/crates/networking/p2p/peer_handler.rs @@ -1,5 +1,5 @@ use std::{ - collections::{HashSet, VecDeque}, + collections::{BTreeMap, HashSet, VecDeque}, io::ErrorKind, path::{Path, PathBuf}, sync::atomic::Ordering, @@ -1302,10 +1302,11 @@ impl PeerHandler { }); } - // 2) request the chunks from peers - let mut all_account_storages = - vec![vec![]; account_storage_roots.accounts_with_storage_root.len()]; - + let all_account_hashes: Vec = account_storage_roots + .accounts_with_storage_root + .iter() + .map(|(addr, _)| *addr) + .collect(); // channel to send the tasks to the peers let (task_sender, mut task_receiver) = tokio::sync::mpsc::channel::(1000); @@ -1319,24 +1320,23 @@ impl PeerHandler { // TODO: in a refactor, delete this replace with a structure that can handle removes let mut accounts_done: Vec = Vec::new(); - let current_account_hashes = account_storage_roots - .accounts_with_storage_root - .iter() - .map(|a| *a.0) - .collect::>(); + // Maps hashed address to storage root and vector of hashed storage keys and keys + let mut current_account_storages: BTreeMap)> = + BTreeMap::new(); debug!("Starting request_storage_ranges loop"); loop { - if all_account_storages.iter().map(Vec::len).sum::() * 64 > RANGE_FILE_CHUNK_SIZE + if current_account_storages + .values() + .map(|(_, storages)| storages.len()) + .sum::() + * 64 + > RANGE_FILE_CHUNK_SIZE { - let current_account_storages = std::mem::take(&mut all_account_storages); - all_account_storages = - vec![vec![]; account_storage_roots.accounts_with_storage_root.len()]; - - let snapshot = current_account_hashes - .clone() + let current_account_storages = std::mem::take(&mut current_account_storages); + let snapshot = current_account_storages .into_iter() - .zip(current_account_storages) + .map(|(hashed_address, (_, storages))| (hashed_address, storages)) .collect::>() .encode_to_vec(); @@ -1384,7 +1384,7 @@ impl PeerHandler { self.peer_table.free_peer(peer_id).await; - for account in ¤t_account_hashes[start_index..remaining_start] { + for account in &all_account_hashes[start_index..remaining_start] { accounts_done.push(*account); } @@ -1411,10 +1411,10 @@ impl PeerHandler { }; tasks_queue_not_started.push_back(task); task_count += 1; - accounts_done.push(current_account_hashes[remaining_start]); + accounts_done.push(all_account_hashes[remaining_start]); account_storage_roots .healed_accounts - .insert(current_account_hashes[start_index]); + .insert(all_account_hashes[start_index]); } } else { if remaining_start + 1 < remaining_end { @@ -1498,11 +1498,23 @@ impl PeerHandler { tasks_queue_not_started.len() ); if account_storages.len() == 1 { + let address = all_account_hashes[start_index]; // We downloaded a big storage account - all_account_storages[start_index].extend(account_storages.remove(0)); + current_account_storages + .entry(address) + .or_insert_with(|| { + ( + account_storage_roots.accounts_with_storage_root[&address], + Vec::new(), + ) + }) + .1 + .extend(account_storages.remove(0)); } else { for (i, storage) in account_storages.into_iter().enumerate() { - all_account_storages[start_index + i] = storage; + let address = all_account_hashes[start_index + i]; + let root_hash = account_storage_roots.accounts_with_storage_root[&address]; + current_account_storages.insert(address, (root_hash, storage)); } } } @@ -1559,16 +1571,9 @@ impl PeerHandler { } { - let current_account_hashes = account_storage_roots - .accounts_with_storage_root - .iter() - .map(|a| *a.0) - .collect::>(); - let current_account_storages = std::mem::take(&mut all_account_storages); - - let snapshot = current_account_hashes + let snapshot = current_account_storages .into_iter() - .zip(current_account_storages) + .map(|(addr, (_, storages))| (addr, storages)) .collect::>() .encode_to_vec(); From df2811d02f73f1a1452b52b0aa23c72e91618c4c Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Wed, 24 Sep 2025 16:40:13 -0300 Subject: [PATCH 036/115] fast put batch --- crates/common/trie/db.rs | 12 +++++++++++- crates/storage/trie_db/rocksdb.rs | 18 ++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/crates/common/trie/db.rs b/crates/common/trie/db.rs index 91b5e0462f9..115e33b5790 100644 --- a/crates/common/trie/db.rs +++ b/crates/common/trie/db.rs @@ -1,4 +1,6 @@ -use crate::{NodeHash, error::TrieError}; +use ethrex_rlp::encode::RLPEncode; + +use crate::{Node, NodeHash, error::TrieError}; use std::{ collections::BTreeMap, sync::{Arc, Mutex}, @@ -7,6 +9,14 @@ use std::{ pub trait TrieDB: Send + Sync { fn get(&self, key: NodeHash) -> Result>, TrieError>; fn put_batch(&self, key_values: Vec<(NodeHash, Vec)>) -> Result<(), TrieError>; + fn put_batch_no_alloc(&self, key_values: Vec) -> Result<(), TrieError> { + self.put_batch( + key_values + .into_iter() + .map(|node| (node.compute_hash(), node.encode_to_vec())) + .collect(), + ) + } fn put(&self, key: NodeHash, value: Vec) -> Result<(), TrieError> { self.put_batch(vec![(key, value)]) } diff --git a/crates/storage/trie_db/rocksdb.rs b/crates/storage/trie_db/rocksdb.rs index 3aa74bb9cb1..7402f24ec3c 100644 --- a/crates/storage/trie_db/rocksdb.rs +++ b/crates/storage/trie_db/rocksdb.rs @@ -1,4 +1,5 @@ use ethrex_common::H256; +use ethrex_rlp::encode::RLPEncode; use ethrex_trie::{NodeHash, TrieDB, error::TrieError}; use rocksdb::{DBWithThreadMode, MultiThreaded}; use std::sync::Arc; @@ -79,6 +80,23 @@ impl TrieDB for RocksDBTrieDB { .write(batch) .map_err(|e| TrieError::DbError(anyhow::anyhow!("RocksDB batch write error: {}", e))) } + + fn put_batch_no_alloc(&self, key_values: Vec) -> Result<(), TrieError> { + let cf = self.cf_handle()?; + let mut batch = rocksdb::WriteBatch::default(); + let mut buffer = vec![0_u8; 300]; + + for node in key_values { + let db_key = self.make_key(node.compute_hash()); + buffer.clear(); + node.encode(&mut buffer); + batch.put_cf(&cf, db_key, &buffer); + } + + self.db + .write(batch) + .map_err(|e| TrieError::DbError(anyhow::anyhow!("RocksDB batch write error: {}", e))) + } } #[cfg(test)] From 13d66590327b69b739fe975246e1cf757941b75c Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Wed, 24 Sep 2025 16:42:05 -0300 Subject: [PATCH 037/115] Update rocksdb.rs --- crates/storage/trie_db/rocksdb.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/storage/trie_db/rocksdb.rs b/crates/storage/trie_db/rocksdb.rs index 7402f24ec3c..8060aa4b20f 100644 --- a/crates/storage/trie_db/rocksdb.rs +++ b/crates/storage/trie_db/rocksdb.rs @@ -84,7 +84,7 @@ impl TrieDB for RocksDBTrieDB { fn put_batch_no_alloc(&self, key_values: Vec) -> Result<(), TrieError> { let cf = self.cf_handle()?; let mut batch = rocksdb::WriteBatch::default(); - let mut buffer = vec![0_u8; 300]; + let mut buffer = Vec::with_capacity(300); for node in key_values { let db_key = self.make_key(node.compute_hash()); From 8896db3ecaa61e346405b84c9e37a022c01e6f0f Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Wed, 24 Sep 2025 17:03:43 -0300 Subject: [PATCH 038/115] threadpool --- Cargo.lock | 9 ++++++++- crates/networking/p2p/Cargo.toml | 1 + crates/networking/p2p/sync.rs | 17 ++++++----------- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 71f6af166b0..3f059b4438f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4336,6 +4336,7 @@ dependencies = [ "rand 0.8.5", "rayon", "rocksdb", + "scoped_threadpool", "secp256k1", "serde", "serde_json", @@ -10743,6 +10744,12 @@ dependencies = [ "serde_json", ] +[[package]] +name = "scoped_threadpool" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d51f5df5af43ab3f1360b429fa5e0152ac5ce8c0bd6485cae490332e96846a8" + [[package]] name = "scopeguard" version = "1.2.0" @@ -13173,7 +13180,7 @@ version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ - "cfg-if 0.1.10", + "cfg-if 1.0.3", "static_assertions", ] diff --git a/crates/networking/p2p/Cargo.toml b/crates/networking/p2p/Cargo.toml index 0b7999f32b6..2d1d7e61a28 100644 --- a/crates/networking/p2p/Cargo.toml +++ b/crates/networking/p2p/Cargo.toml @@ -34,6 +34,7 @@ cfg-if.workspace = true rocksdb = { workspace = true, optional = true } prometheus = "0.14.0" +scoped_threadpool = "0.1.9" tokio-stream = "0.1.17" sha3 = "0.10.8" diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index acb29b3e5e7..04c9951cd52 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1609,6 +1609,7 @@ async fn insert_storage_into_rocksdb( use ethrex_trie::trie_sorted::trie_from_sorted_accounts_wrap; use rayon::iter::IntoParallelRefIterator; use rocksdb::DBCommon; + use scoped_threadpool::Pool; struct RocksDBIterator<'a> { iter: rocksdb::DBRawIterator<'a>, @@ -1658,12 +1659,13 @@ async fn insert_storage_into_rocksdb( db.ingest_external_file(file_paths) .map_err(|err| SyncError::RocksDBError(err.into_string()))?; - std::thread::scope(|s| { - let mut joinset = Vec::new(); + // Create a threadpool holding 4 threads + let mut pool = Pool::new(16); + pool.scoped(|s| { for account_hash in accounts_with_storage { let store_clone = store.clone(); let mut iter = db.raw_iterator(); - let handle = s.spawn(move || { + s.execute(move || { let trie = store_clone .open_storage_trie(account_hash, *EMPTY_TRIE_HASH) .expect("Should be able to open trie"); @@ -1686,14 +1688,7 @@ async fn insert_storage_into_rocksdb( }) .map_err(SyncError::TrieGenerationError); METRICS.storage_tries_state_roots_computed.inc(); - result - }); - joinset.push(handle); - joinset - .extract_if(.., |handle| handle.is_finished()) - .map(|handle| handle.join()) - .collect::, _>, _>>() - .map_err(|_| SyncError::ThreadCreationError)??; //this needs to be a no new thread + }) } Ok(()) }) From 86869ec4d58c35a4e5dcfd5083d66d0e7102e622 Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Thu, 25 Sep 2025 02:00:14 -0300 Subject: [PATCH 039/115] perf(l1): compress storage trie downloads --- crates/networking/p2p/peer_handler.rs | 91 ++++++++++++++++----------- crates/networking/p2p/sync.rs | 15 +++-- 2 files changed, 63 insertions(+), 43 deletions(-) diff --git a/crates/networking/p2p/peer_handler.rs b/crates/networking/p2p/peer_handler.rs index ac6e7d6ce86..c3a08eac06b 100644 --- a/crates/networking/p2p/peer_handler.rs +++ b/crates/networking/p2p/peer_handler.rs @@ -14,6 +14,7 @@ use ethrex_common::{ use ethrex_rlp::encode::RLPEncode; use ethrex_trie::Nibbles; use ethrex_trie::{Node, verify_range}; +use futures::SinkExt; use rand::seq::SliceRandom; use crate::{ @@ -1283,8 +1284,32 @@ impl PeerHandler { *METRICS.current_step.lock().await = "Requesting Storage Ranges".to_string(); debug!("Starting request_storage_ranges function"); // 1) split the range in chunks of same length + let mut accounts_by_root_hash: BTreeMap<_, Vec<_>> = BTreeMap::new(); + for (account, root_hash) in &account_storage_roots.accounts_with_storage_root { + accounts_by_root_hash + .entry(*root_hash) + .or_default() + .push(*account); + } + let mut accounts_by_root_hash = Vec::from_iter(accounts_by_root_hash); + accounts_by_root_hash.sort_unstable_by_key(|(_, accounts)| accounts.len()); let chunk_size = 300; - let chunk_count = (account_storage_roots.accounts_with_storage_root.len() / chunk_size) + 1; + let chunk_count = (accounts_by_root_hash.len() / chunk_size) + 1; + + // TODO: + // To download repeated tries only once, we can group by root_hash so + // we download one address and then store N times (for simpler insertion/healing). + // Take care of doing it inside this function to avoid confusion between + // pivots. + // At a later time we might try to also store only once and insert for all. + // That should help at least to do less `compute_storage_roots`, but skipping + // that might be problematic for healing. + // We can also sort by decreasing number of repetitions, so we download + // and settle the most common first. + // AFTER: try to reduce memory usage from account filtering. + // It currently takes about 68B per account with storages, with ~25M of them, + // meaning 1.7GB. Possibly several copies of this. + // THEN: review storage formats, maybe play with memory mapped data. // list of tasks to be executed // Types are (start_index, end_index, starting_hash) @@ -1292,8 +1317,7 @@ impl PeerHandler { let mut tasks_queue_not_started = VecDeque::::new(); for i in 0..chunk_count { let chunk_start = chunk_size * i; - let chunk_end = (chunk_start + chunk_size) - .min(account_storage_roots.accounts_with_storage_root.len()); + let chunk_end = (chunk_start + chunk_size).min(accounts_by_root_hash.len()); tasks_queue_not_started.push_back(StorageTask { start_index: chunk_start, end_index: chunk_end, @@ -1302,11 +1326,11 @@ impl PeerHandler { }); } - let all_account_hashes: Vec = account_storage_roots - .accounts_with_storage_root - .iter() - .map(|(addr, _)| *addr) - .collect(); + // let all_account_hashes: Vec = account_storage_roots + // .accounts_with_storage_root + // .keys() + // .copied() + // .collect(); // channel to send the tasks to the peers let (task_sender, mut task_receiver) = tokio::sync::mpsc::channel::(1000); @@ -1320,23 +1344,23 @@ impl PeerHandler { // TODO: in a refactor, delete this replace with a structure that can handle removes let mut accounts_done: Vec = Vec::new(); - // Maps hashed address to storage root and vector of hashed storage keys and keys - let mut current_account_storages: BTreeMap)> = + // Maps storage root to vector of hashed addresses matching that root and + // vector of hashed storage keys and storage values. + let mut current_account_storages: BTreeMap, Vec<(H256, U256)>)> = BTreeMap::new(); debug!("Starting request_storage_ranges loop"); loop { if current_account_storages .values() - .map(|(_, storages)| storages.len()) + .map(|(accounts, storages)| 32 * accounts.len() + 32 * storages.len()) .sum::() - * 64 > RANGE_FILE_CHUNK_SIZE { let current_account_storages = std::mem::take(&mut current_account_storages); let snapshot = current_account_storages .into_iter() - .map(|(hashed_address, (_, storages))| (hashed_address, storages)) + .map(|(_, (accounts, storages))| (accounts, storages)) .collect::>() .encode_to_vec(); @@ -1384,9 +1408,11 @@ impl PeerHandler { self.peer_table.free_peer(peer_id).await; - for account in &all_account_hashes[start_index..remaining_start] { - accounts_done.push(*account); - } + accounts_done.extend( + accounts_by_root_hash[start_index..remaining_start] + .iter() + .flat_map(|(_, accounts)| accounts.iter().copied()), + ); if remaining_start < remaining_end { debug!("Failed to download entire chunk from peer {peer_id}"); @@ -1411,10 +1437,11 @@ impl PeerHandler { }; tasks_queue_not_started.push_back(task); task_count += 1; - accounts_done.push(all_account_hashes[remaining_start]); + accounts_done + .extend(accounts_by_root_hash[remaining_start].1.iter().copied()); account_storage_roots .healed_accounts - .insert(all_account_hashes[start_index]); + .extend(accounts_by_root_hash[start_index].1.iter().copied()); } } else { if remaining_start + 1 < remaining_end { @@ -1497,24 +1524,20 @@ impl PeerHandler { "Total tasks: {task_count}, completed tasks: {completed_tasks}, queued tasks: {}", tasks_queue_not_started.len() ); + // THEN: update insert to read with the correct structure and reuse + // tries, only changing the prefix for insertion. if account_storages.len() == 1 { - let address = all_account_hashes[start_index]; + let (root_hash, accounts) = &accounts_by_root_hash[start_index]; // We downloaded a big storage account current_account_storages - .entry(address) - .or_insert_with(|| { - ( - account_storage_roots.accounts_with_storage_root[&address], - Vec::new(), - ) - }) + .entry(*root_hash) + .or_insert_with(|| (accounts.clone(), Vec::new())) .1 .extend(account_storages.remove(0)); } else { for (i, storage) in account_storages.into_iter().enumerate() { - let address = all_account_hashes[start_index + i]; - let root_hash = account_storage_roots.accounts_with_storage_root[&address]; - current_account_storages.insert(address, (root_hash, storage)); + let (root_hash, accounts) = &accounts_by_root_hash[start_index]; + current_account_storages.insert(*root_hash, (accounts.clone(), storage)); } } } @@ -1542,13 +1565,11 @@ impl PeerHandler { let tx = task_sender.clone(); + // FIXME: this unzip is probably pointless and takes up unnecessary memory. let (chunk_account_hashes, chunk_storage_roots): (Vec<_>, Vec<_>) = - account_storage_roots - .accounts_with_storage_root + accounts_by_root_hash[task.start_index..task.end_index] .iter() - .skip(task.start_index) - .take(task.end_index - task.start_index) - .map(|(hash, root)| (*hash, *root)) + .map(|(root, storages)| (storages[0], *root)) .unzip(); if task_count - completed_tasks < 30 { @@ -1573,7 +1594,7 @@ impl PeerHandler { { let snapshot = current_account_storages .into_iter() - .map(|(addr, (_, storages))| (addr, storages)) + .map(|(_, (accounts, storages))| (accounts, storages)) .collect::>() .encode_to_vec(); diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 1e038583836..df087f5537d 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1043,7 +1043,7 @@ impl Syncer { let snapshot_contents = std::fs::read(&snapshot_path) .map_err(|err| SyncError::SnapshotReadError(snapshot_path.clone(), err))?; - let account_storages_snapshot: Vec<(H256, Vec<(H256, U256)>)> = + let account_storages_snapshot: Vec<(H256, (Vec, Vec<(H256, U256)>))> = RLPDecode::decode(&snapshot_contents) .map_err(|_| SyncError::SnapshotDecodeError(snapshot_path.clone()))?; @@ -1059,14 +1059,15 @@ impl Syncer { // We need to fix this issue in request_storage_ranges and remove this filter. account_storages_snapshot .into_par_iter() - .filter(|(_account_hash, storage)| !storage.is_empty()) - .map(|(account_hash, key_value_pairs)| { + .filter(|(_root_hash, (_accounts, storage))| !storage.is_empty()) + .map(|(root_hash, (accounts, key_value_pairs))| { compute_storage_roots( maybe_big_account_storage_state_roots_clone.clone(), store.clone(), - account_hash, + accounts[0], key_value_pairs, pivot_hash_moved, + root_hash, ) }) .collect::, SyncError>>() @@ -1237,6 +1238,7 @@ fn compute_storage_roots( account_hash: H256, key_value_pairs: Vec<(H256, U256)>, pivot_hash: H256, + expected_root_hash: H256, ) -> Result { let account_storage_root = match maybe_big_account_storage_state_roots .lock() @@ -1259,10 +1261,7 @@ fn compute_storage_roots( let (computed_storage_root, changes) = storage_trie.collect_changes_since_last_hash(); - let account_state = store - .get_account_state_by_acc_hash(pivot_hash, account_hash)? - .ok_or(SyncError::AccountState(pivot_hash, account_hash))?; - if computed_storage_root == account_state.storage_root { + if computed_storage_root == expected_root_hash { METRICS.storage_tries_state_roots_computed.inc(); } else { maybe_big_account_storage_state_roots From 2ec688b467eaf87a0239173145c714af7a2f68ea Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Thu, 25 Sep 2025 02:25:14 -0300 Subject: [PATCH 040/115] fix order --- crates/networking/p2p/peer_handler.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/networking/p2p/peer_handler.rs b/crates/networking/p2p/peer_handler.rs index c3a08eac06b..ad3f23ac6d2 100644 --- a/crates/networking/p2p/peer_handler.rs +++ b/crates/networking/p2p/peer_handler.rs @@ -1292,7 +1292,7 @@ impl PeerHandler { .push(*account); } let mut accounts_by_root_hash = Vec::from_iter(accounts_by_root_hash); - accounts_by_root_hash.sort_unstable_by_key(|(_, accounts)| accounts.len()); + accounts_by_root_hash.sort_unstable_by_key(|(_, accounts)| !accounts.len()); let chunk_size = 300; let chunk_count = (accounts_by_root_hash.len() / chunk_size) + 1; From f0d1e0e361bd1ad19bbfa343509daadcff8b2e39 Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Thu, 25 Sep 2025 10:36:04 -0300 Subject: [PATCH 041/115] fix insertion --- crates/networking/p2p/sync.rs | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index df087f5537d..555b5b5c11b 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1043,7 +1043,7 @@ impl Syncer { let snapshot_contents = std::fs::read(&snapshot_path) .map_err(|err| SyncError::SnapshotReadError(snapshot_path.clone(), err))?; - let account_storages_snapshot: Vec<(H256, (Vec, Vec<(H256, U256)>))> = + let account_storages_snapshot: Vec<(Vec, Vec<(H256, U256)>)> = RLPDecode::decode(&snapshot_contents) .map_err(|_| SyncError::SnapshotDecodeError(snapshot_path.clone()))?; @@ -1055,19 +1055,20 @@ impl Syncer { let storage_trie_node_changes = tokio::task::spawn_blocking(move || { let store: Store = store_clone; - // TODO: Here we are filtering again the account with empty storage because we are adding empty accounts on purpose (it was the easiest thing to do) - // We need to fix this issue in request_storage_ranges and remove this filter. account_storages_snapshot .into_par_iter() - .filter(|(_root_hash, (_accounts, storage))| !storage.is_empty()) - .map(|(root_hash, (accounts, key_value_pairs))| { + .flat_map(|(accounts, storages)| { + accounts + .into_par_iter() + .map(move |account| (account, storages.clone())) + }) + .map(|(account, storages)| { compute_storage_roots( maybe_big_account_storage_state_roots_clone.clone(), store.clone(), - accounts[0], - key_value_pairs, + account, + storages.clone(), pivot_hash_moved, - root_hash, ) }) .collect::, SyncError>>() @@ -1238,7 +1239,6 @@ fn compute_storage_roots( account_hash: H256, key_value_pairs: Vec<(H256, U256)>, pivot_hash: H256, - expected_root_hash: H256, ) -> Result { let account_storage_root = match maybe_big_account_storage_state_roots .lock() @@ -1261,7 +1261,10 @@ fn compute_storage_roots( let (computed_storage_root, changes) = storage_trie.collect_changes_since_last_hash(); - if computed_storage_root == expected_root_hash { + let account_state = store + .get_account_state_by_acc_hash(pivot_hash, account_hash)? + .ok_or(SyncError::AccountState(pivot_hash, account_hash))?; + if computed_storage_root == account_state.storage_root { METRICS.storage_tries_state_roots_computed.inc(); } else { maybe_big_account_storage_state_roots From bccc632c8adcffbfa6db4077684ca9c0b972c79f Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Thu, 25 Sep 2025 11:09:17 -0300 Subject: [PATCH 042/115] Back into par iter --- crates/common/trie/trie_sorted.rs | 38 ++++++++++++------------------- crates/networking/p2p/sync.rs | 18 ++++++--------- 2 files changed, 22 insertions(+), 34 deletions(-) diff --git a/crates/common/trie/trie_sorted.rs b/crates/common/trie/trie_sorted.rs index 53988a5ac8b..a55eafa2658 100644 --- a/crates/common/trie/trie_sorted.rs +++ b/crates/common/trie/trie_sorted.rs @@ -1,12 +1,14 @@ -use std::thread::{Scope, scope}; - use crate::{ EMPTY_TRIE_HASH, Nibbles, Node, TrieDB, TrieError, node::{BranchNode, ExtensionNode, LeafNode}, }; use ethereum_types::H256; use ethrex_rlp::encode::RLPEncode; -use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; +use rayon::{ + Scope, + iter::{IntoParallelRefIterator, ParallelIterator}, + scope, +}; use tracing::debug; #[derive(Debug, Default, Clone)] @@ -126,27 +128,21 @@ fn flush_nodes_to_write( nodes_to_write: Vec, db: &dyn TrieDB, ) -> Result<(), TrieGenerationError> { - db.put_batch( - nodes_to_write - .par_iter() - .map(|node| (node.compute_hash(), node.encode_to_vec())) - .collect(), - ) - .map_err(TrieGenerationError::FlushToDbError) + db.put_batch_no_alloc(nodes_to_write) + .map_err(TrieGenerationError::FlushToDbError) } #[inline(never)] pub fn trie_from_sorted_accounts<'scope, T>( db: &'scope dyn TrieDB, data_iter: &mut T, - scoped_thread: &'scope Scope<'scope, '_>, + scope: &Scope<'scope>, ) -> Result where - T: Iterator)>, + T: Iterator)> + Send, { let mut nodes_to_write: Vec = Vec::with_capacity(SIZE_TO_WRITE_DB as usize + 65); let mut trie_stack: Vec = Vec::with_capacity(64); // Optimized for H256 - let mut write_threads = Vec::new(); let mut left_side = StackElement::default(); let Some(initial_value) = data_iter.next() else { @@ -168,8 +164,9 @@ where while let Some(right_side) = right_side_opt { if nodes_to_write.len() as u64 > SIZE_TO_WRITE_DB { - write_threads - .push(scoped_thread.spawn(move || flush_nodes_to_write(nodes_to_write, db))); + scope.spawn(move |_| { + let _ = flush_nodes_to_write(nodes_to_write, db); + }); nodes_to_write = Vec::with_capacity(SIZE_TO_WRITE_DB as usize + 65); } @@ -271,12 +268,7 @@ where .finalize() }; - write_threads.push(scoped_thread.spawn(move || flush_nodes_to_write(nodes_to_write, db))); - write_threads - .into_iter() - .flat_map(|thread| thread.join()) - .collect::>()?; - + flush_nodes_to_write(nodes_to_write, db)?; Ok(hash) } @@ -285,9 +277,9 @@ pub fn trie_from_sorted_accounts_wrap( accounts_iter: &mut T, ) -> Result where - T: Iterator)>, + T: Iterator)> + Send, { - scope(move |s| trie_from_sorted_accounts(db, accounts_iter, s)) + scope(|s| trie_from_sorted_accounts(db, accounts_iter, s)) } #[cfg(test)] diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 04c9951cd52..1492c421ca2 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1659,13 +1659,11 @@ async fn insert_storage_into_rocksdb( db.ingest_external_file(file_paths) .map_err(|err| SyncError::RocksDBError(err.into_string()))?; - // Create a threadpool holding 4 threads - let mut pool = Pool::new(16); - pool.scoped(|s| { - for account_hash in accounts_with_storage { - let store_clone = store.clone(); - let mut iter = db.raw_iterator(); - s.execute(move || { + accounts_with_storage.into_par_iter().for_each(|account_hash| { + let store_clone = store.clone(); + use ethrex_trie::trie_sorted::trie_from_sorted_accounts; + let mut iter = db.raw_iterator(); + let trie = store_clone .open_storage_trie(account_hash, *EMPTY_TRIE_HASH) .expect("Should be able to open trie"); @@ -1688,8 +1686,6 @@ async fn insert_storage_into_rocksdb( }) .map_err(SyncError::TrieGenerationError); METRICS.storage_tries_state_roots_computed.inc(); - }) - } - Ok(()) - }) + }); + Ok(()) } From f7a470272359d78365a6b848335267629ee08f7b Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Thu, 25 Sep 2025 11:20:21 -0300 Subject: [PATCH 043/115] fix typo + time roots --- crates/networking/p2p/peer_handler.rs | 3 +-- crates/networking/p2p/sync.rs | 9 +++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/crates/networking/p2p/peer_handler.rs b/crates/networking/p2p/peer_handler.rs index ad3f23ac6d2..32cd6ef2959 100644 --- a/crates/networking/p2p/peer_handler.rs +++ b/crates/networking/p2p/peer_handler.rs @@ -14,7 +14,6 @@ use ethrex_common::{ use ethrex_rlp::encode::RLPEncode; use ethrex_trie::Nibbles; use ethrex_trie::{Node, verify_range}; -use futures::SinkExt; use rand::seq::SliceRandom; use crate::{ @@ -1536,7 +1535,7 @@ impl PeerHandler { .extend(account_storages.remove(0)); } else { for (i, storage) in account_storages.into_iter().enumerate() { - let (root_hash, accounts) = &accounts_by_root_hash[start_index]; + let (root_hash, accounts) = &accounts_by_root_hash[start_index + i]; current_account_storages.insert(*root_hash, (accounts.clone(), storage)); } } diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 555b5b5c11b..2a0d161855c 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1060,16 +1060,21 @@ impl Syncer { .flat_map(|(accounts, storages)| { accounts .into_par_iter() + // FIXME: we probably want to make storages an Arc .map(move |account| (account, storages.clone())) }) .map(|(account, storages)| { - compute_storage_roots( + let start = Instant::now(); + let changes = compute_storage_roots( maybe_big_account_storage_state_roots_clone.clone(), store.clone(), account, storages.clone(), pivot_hash_moved, - ) + ); + let duration = Instant::now() - start; + debug!(duration = duration.as_micros(), "Computed Storage Root"); + changes }) .collect::, SyncError>>() }) From 9ea9ddf2a54e0ddee389c01f48abd1c18bbded9a Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Thu, 25 Sep 2025 11:24:26 -0300 Subject: [PATCH 044/115] extra data + reuse storages --- crates/networking/p2p/sync.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 2a0d161855c..c0c19f0305f 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1058,6 +1058,7 @@ impl Syncer { account_storages_snapshot .into_par_iter() .flat_map(|(accounts, storages)| { + let storages: Arc<[_]> = storages.into(); accounts .into_par_iter() // FIXME: we probably want to make storages an Arc @@ -1065,15 +1066,20 @@ impl Syncer { }) .map(|(account, storages)| { let start = Instant::now(); + let n_keys = storages.len(); let changes = compute_storage_roots( maybe_big_account_storage_state_roots_clone.clone(), store.clone(), account, - storages.clone(), + &storages, pivot_hash_moved, ); let duration = Instant::now() - start; - debug!(duration = duration.as_micros(), "Computed Storage Root"); + debug!( + duration = duration.as_micros(), + keys = n_keys, + "Computed Storage Root" + ); changes }) .collect::, SyncError>>() @@ -1242,7 +1248,7 @@ fn compute_storage_roots( maybe_big_account_storage_state_roots: Arc>>, store: Store, account_hash: H256, - key_value_pairs: Vec<(H256, U256)>, + key_value_pairs: &[(H256, U256)], pivot_hash: H256, ) -> Result { let account_storage_root = match maybe_big_account_storage_state_roots From e66cd62de851e6089f9866f6d5b9ba16cbede9b3 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Thu, 25 Sep 2025 12:14:48 -0300 Subject: [PATCH 045/115] Make lint --- crates/common/trie/trie_sorted.rs | 11 ++--- crates/networking/p2p/peer_handler.rs | 4 +- crates/networking/p2p/sync.rs | 51 ++++++++++++-------- crates/networking/p2p/sync/code_collector.rs | 2 + crates/networking/p2p/utils.rs | 4 +- 5 files changed, 42 insertions(+), 30 deletions(-) diff --git a/crates/common/trie/trie_sorted.rs b/crates/common/trie/trie_sorted.rs index a55eafa2658..6de6891dffd 100644 --- a/crates/common/trie/trie_sorted.rs +++ b/crates/common/trie/trie_sorted.rs @@ -3,12 +3,7 @@ use crate::{ node::{BranchNode, ExtensionNode, LeafNode}, }; use ethereum_types::H256; -use ethrex_rlp::encode::RLPEncode; -use rayon::{ - Scope, - iter::{IntoParallelRefIterator, ParallelIterator}, - scope, -}; +use rayon::{Scope, scope}; use tracing::debug; #[derive(Debug, Default, Clone)] @@ -17,6 +12,9 @@ struct StackElement { element: BranchNode, } +// The large size isn't a performance problem because we use a single instance of this +// struct +#[allow(clippy::large_enum_variant)] #[derive(Debug, Clone)] enum CenterSideElement { Branch { node: BranchNode }, @@ -285,6 +283,7 @@ where #[cfg(test)] mod test { use ethereum_types::U256; + use ethrex_rlp::encode::RLPEncode; use crate::Trie; diff --git a/crates/networking/p2p/peer_handler.rs b/crates/networking/p2p/peer_handler.rs index e83c56ee5c2..d6b23bfa3c3 100644 --- a/crates/networking/p2p/peer_handler.rs +++ b/crates/networking/p2p/peer_handler.rs @@ -36,9 +36,9 @@ use crate::{ }, }, snap::encodable_to_proof, - sync::{AccountStorageRoots, BlockSyncState, SyncError, block_is_stale, update_pivot}, + sync::{AccountStorageRoots, BlockSyncState, block_is_stale, update_pivot}, utils::{ - SendMessageError, dump_accounts_to_file, dump_storages_to_file, dump_to_file, + SendMessageError, dump_accounts_to_file, dump_storages_to_file, get_account_state_snapshot_file, get_account_storages_snapshot_file, }, }; diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 1492c421ca2..0ffd00055e6 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -9,7 +9,7 @@ use crate::sync::state_healing::heal_state_trie_wrap; use crate::sync::storage_healing::heal_storage_trie; use crate::utils::{ current_unix_time, get_account_state_snapshots_dir, get_account_storages_snapshots_dir, - get_code_hashes_snapshots_dir, get_rocksdb_temp_accounts_dir, get_rocksdb_temp_storage_dir, + get_code_hashes_snapshots_dir, }; use crate::{ metrics::METRICS, @@ -24,17 +24,21 @@ use ethrex_common::{ use ethrex_rlp::{decode::RLPDecode, encode::RLPEncode, error::RLPDecodeError}; use ethrex_storage::{EngineType, STATE_TRIE_SEGMENTS, Store, error::StoreError}; use ethrex_trie::trie_sorted::TrieGenerationError; -use ethrex_trie::{NodeHash, Trie, TrieError}; +use ethrex_trie::{Trie, TrieError}; use rayon::iter::{IntoParallelIterator, ParallelBridge, ParallelIterator}; -use std::collections::{BTreeMap, BTreeSet, HashSet}; +#[cfg(not(feature = "rocksdb"))] +use std::collections::hash_map::Entry; +use std::collections::{BTreeMap, HashSet}; use std::path::{Path, PathBuf}; +#[cfg(not(feature = "rocksdb"))] +use std::sync::Mutex; use std::time::SystemTime; use std::{ array, cmp::min, - collections::{HashMap, hash_map::Entry}, + collections::HashMap, sync::{ - Arc, Mutex, + Arc, atomic::{AtomicBool, Ordering}, }, }; @@ -885,10 +889,10 @@ impl Syncer { store.clone(), &mut storage_accounts, &account_state_snapshots_dir, - &get_rocksdb_temp_accounts_dir(&self.datadir), + &crate::utils::get_rocksdb_temp_accounts_dir(&self.datadir), &mut code_hash_collector, ).await?; - let accounts_with_storage: BTreeSet = BTreeSet::from_iter(storage_accounts.accounts_with_storage_root.keys().into_iter().map(|k| *k)); + let accounts_with_storage = std::collections::BTreeSet::from_iter(storage_accounts.accounts_with_storage_root.keys().copied()); } else { let computed_state_root = insert_accounts_into_db( store.clone(), @@ -989,7 +993,7 @@ impl Syncer { store.clone(), accounts_with_storage, &account_storages_snapshots_dir, - &get_rocksdb_temp_storage_dir(&self.datadir) + &crate::utils::get_rocksdb_temp_storage_dir(&self.datadir) ).await?; } else { let maybe_big_account_storage_state_roots: Arc>> = @@ -1155,8 +1159,10 @@ impl Syncer { } } -type StorageRoots = (H256, Vec<(NodeHash, Vec)>); +#[cfg(not(feature = "rocksdb"))] +type StorageRoots = (H256, Vec<(ethrex_trie::NodeHash, Vec)>); +#[cfg(not(feature = "rocksdb"))] fn compute_storage_roots( maybe_big_account_storage_state_roots: Arc>>, store: Store, @@ -1434,6 +1440,7 @@ pub async fn validate_bytecodes(store: Store, state_root: H256) -> bool { is_valid } +#[cfg(not(feature = "rocksdb"))] async fn insert_accounts_into_db( store: Store, storage_accounts: &mut AccountStorageRoots, @@ -1493,6 +1500,7 @@ async fn insert_accounts_into_db( Ok(computed_state_root) } +#[cfg(not(feature = "rocksdb"))] async fn insert_storages_into_db( store: Store, account_storages_snapshots_dir: &Path, @@ -1574,6 +1582,16 @@ async fn insert_accounts_into_rocksdb( .collect(); db.ingest_external_file(file_paths) .map_err(|err| SyncError::RocksDBError(err.into_string()))?; + let iter = db.full_iterator(rocksdb::IteratorMode::Start); + for account in iter { + let account = account.map_err(|err| SyncError::RocksDBError(err.into_string()))?; + let account_state = AccountState::decode(&account.1).map_err(SyncError::Rlp)?; + if account_state.code_hash != *EMPTY_KECCACK_HASH { + code_hash_collector.add(account_state.code_hash); + code_hash_collector.flush_if_needed().await?; + } + } + let iter = db.full_iterator(rocksdb::IteratorMode::Start); trie_from_sorted_accounts_wrap( trie.db(), @@ -1589,10 +1607,6 @@ async fn insert_accounts_into_rocksdb( .accounts_with_storage_root .insert(H256::from_slice(k), account_state.storage_root); } - if account_state.code_hash != *EMPTY_KECCACK_HASH { - code_hash_collector.add(account_state.code_hash); - code_hash_collector.flush_if_needed(); - } }) .map(|(k, v)| (H256::from_slice(&k), v.to_vec())), ) @@ -1602,14 +1616,11 @@ async fn insert_accounts_into_rocksdb( #[cfg(feature = "rocksdb")] async fn insert_storage_into_rocksdb( store: Store, - accounts_with_storage: BTreeSet, + accounts_with_storage: std::collections::BTreeSet, account_state_snapshots_dir: &Path, temp_db_dir: &Path, ) -> Result<(), SyncError> { use ethrex_trie::trie_sorted::trie_from_sorted_accounts_wrap; - use rayon::iter::IntoParallelRefIterator; - use rocksdb::DBCommon; - use scoped_threadpool::Pool; struct RocksDBIterator<'a> { iter: rocksdb::DBRawIterator<'a>, @@ -1659,9 +1670,8 @@ async fn insert_storage_into_rocksdb( db.ingest_external_file(file_paths) .map_err(|err| SyncError::RocksDBError(err.into_string()))?; - accounts_with_storage.into_par_iter().for_each(|account_hash| { + accounts_with_storage.into_par_iter().map(|account_hash| { let store_clone = store.clone(); - use ethrex_trie::trie_sorted::trie_from_sorted_accounts; let mut iter = db.raw_iterator(); let trie = store_clone @@ -1686,6 +1696,7 @@ async fn insert_storage_into_rocksdb( }) .map_err(SyncError::TrieGenerationError); METRICS.storage_tries_state_roots_computed.inc(); - }); + result + }).collect::, _>>()?; Ok(()) } diff --git a/crates/networking/p2p/sync/code_collector.rs b/crates/networking/p2p/sync/code_collector.rs index 7cd3873fc94..f5ccaf81889 100644 --- a/crates/networking/p2p/sync/code_collector.rs +++ b/crates/networking/p2p/sync/code_collector.rs @@ -39,6 +39,8 @@ impl CodeHashCollector { self.buffer.insert(hash); } + // Used depending on if the feature flag rocksdb is used + #[allow(dead_code)] /// Extends the buffer with a list of code hashes pub fn extend(&mut self, hashes: impl IntoIterator) { self.buffer.extend(hashes); diff --git a/crates/networking/p2p/utils.rs b/crates/networking/p2p/utils.rs index 48fcc34befa..b46de9369b8 100644 --- a/crates/networking/p2p/utils.rs +++ b/crates/networking/p2p/utils.rs @@ -127,7 +127,7 @@ pub fn dump_accounts_to_file( cfg_if::cfg_if! { if #[cfg(feature = "rocksdb")] { dump_to_rocks_db( - path.clone(), + path, accounts .into_iter() .map(|(hash, state)| (hash.0.to_vec(), state.encode_to_vec()) @@ -152,7 +152,7 @@ pub fn dump_storages_to_file( cfg_if::cfg_if! { if #[cfg(feature = "rocksdb")] { dump_to_rocks_db( - &path, + path, storages .into_iter() .flat_map(|(hash, slots)| { From 37c1d794a4a3dc3c6dec3a74e87853313a0ed7f2 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Thu, 25 Sep 2025 12:22:39 -0300 Subject: [PATCH 046/115] Update db.rs --- crates/common/trie/db.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/common/trie/db.rs b/crates/common/trie/db.rs index 7941fde01a0..d32c8708859 100644 --- a/crates/common/trie/db.rs +++ b/crates/common/trie/db.rs @@ -1,6 +1,7 @@ use ethereum_types::H256; +use ethrex_rlp::encode::RLPEncode; -use crate::{NodeHash, NodeRLP, Trie, error::TrieError}; +use crate::{Node, NodeHash, NodeRLP, Trie, error::TrieError}; use std::{ collections::BTreeMap, sync::{Arc, Mutex}, From 6c8e0fb093b9d04bf9acf879d5af4c42f4b79b8f Mon Sep 17 00:00:00 2001 From: Pablo Deymonnaz Date: Thu, 25 Sep 2025 16:08:58 -0300 Subject: [PATCH 047/115] WIP: scoped thread pool --- pool_test/Cargo.lock | 7 ++++++ pool_test/Cargo.toml | 6 +++++ pool_test/src/main.rs | 9 +++++++ pool_test/src/myscope.rs | 52 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 74 insertions(+) create mode 100644 pool_test/Cargo.lock create mode 100644 pool_test/Cargo.toml create mode 100644 pool_test/src/main.rs create mode 100644 pool_test/src/myscope.rs diff --git a/pool_test/Cargo.lock b/pool_test/Cargo.lock new file mode 100644 index 00000000000..8658b491916 --- /dev/null +++ b/pool_test/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "pool_test" +version = "0.1.0" diff --git a/pool_test/Cargo.toml b/pool_test/Cargo.toml new file mode 100644 index 00000000000..62b3f1f286d --- /dev/null +++ b/pool_test/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "pool_test" +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/pool_test/src/main.rs b/pool_test/src/main.rs new file mode 100644 index 00000000000..6d1bf5686e3 --- /dev/null +++ b/pool_test/src/main.rs @@ -0,0 +1,9 @@ + +pub mod myscope; + + + +fn main() { + //let pool = ThreadPool::new(4); + println!("Hello, world!"); +} diff --git a/pool_test/src/myscope.rs b/pool_test/src/myscope.rs new file mode 100644 index 00000000000..e62646db9a2 --- /dev/null +++ b/pool_test/src/myscope.rs @@ -0,0 +1,52 @@ +use std::thread; + +pub struct ThreadPool<'scope, 'env> { + s: &'scope thread::Scope<'scope, 'env>, +} + +impl<'scope, 'env> ThreadPool<'scope, 'env> { + pub fn new(thread_count: usize) -> Self { + thread::scope(|s| { + for _ in 0..thread_count { + s.spawn(|| { + // Thread work goes here + }); + } + }); + + ThreadPool { + s: , + } + } +} + +fn execute<'a>(s: &'a thread::Scope<'a, '_>, f: impl FnOnce() + std::marker::Send + 'a) { + s.spawn(|| f); +} + +fn run_scoped_threads() { + let mut a = vec![1, 2, 3]; + let mut x = 0; + + let f = || { + println!("hello from the first scoped thread"); + // We can borrow `a` here. + dbg!(&a); + }; + thread::scope(|s| { + s.spawn(f); + s.spawn(|| { + println!("hello from the second scoped thread"); + // We can even mutably borrow `x` here, + // because no other threads are using it. + x += a[0] + a[2]; + }); + println!("hello from the main thread"); + }); + + // After the scope, we can modify and access our variables again: + a.push(4); + assert_eq!(x, a.len()); +} + +//std::thread::scope From 2ebcafa2afb5bd51a8290e121d2654a65f86adb0 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Thu, 25 Sep 2025 17:11:16 -0300 Subject: [PATCH 048/115] Finished scoped threadpool --- pool_test/Cargo.toml | 1 + pool_test/src/main.rs | 24 ++++++++++++-- pool_test/src/myscope.rs | 70 ++++++++++++++++------------------------ 3 files changed, 50 insertions(+), 45 deletions(-) diff --git a/pool_test/Cargo.toml b/pool_test/Cargo.toml index 62b3f1f286d..39c1cfcb0f8 100644 --- a/pool_test/Cargo.toml +++ b/pool_test/Cargo.toml @@ -2,5 +2,6 @@ name = "pool_test" version = "0.1.0" edition = "2024" +[workspace] [dependencies] diff --git a/pool_test/src/main.rs b/pool_test/src/main.rs index 6d1bf5686e3..339a8a8de6e 100644 --- a/pool_test/src/main.rs +++ b/pool_test/src/main.rs @@ -1,9 +1,27 @@ +use std::{sync::Arc, thread::scope}; -pub mod myscope; - +use crate::myscope::ThreadPool; +pub mod myscope; fn main() { - //let pool = ThreadPool::new(4); + println!("Start"); + scope(|s| { + let pool = ThreadPool::new(4, s); + let pool_arc = Arc::new(pool); + let pool_arc_2 = pool_arc.clone(); + pool_arc.execute(Box::new(move || { + let x = 1; + println!("Inside, Inside, world!"); + pool_arc_2.execute(Box::new(move || { + let x = 3; + println!("3, world!"); + })); + })); + pool_arc.execute(Box::new(move || { + let x = 2; + println!("Inside, world!"); + })); + }); println!("Hello, world!"); } diff --git a/pool_test/src/myscope.rs b/pool_test/src/myscope.rs index e62646db9a2..9ab9dcec682 100644 --- a/pool_test/src/myscope.rs +++ b/pool_test/src/myscope.rs @@ -1,52 +1,38 @@ -use std::thread; - -pub struct ThreadPool<'scope, 'env> { - s: &'scope thread::Scope<'scope, 'env>, +use std::marker::Send; +use std::sync::mpsc::{channel, Receiver, Sender}; +use std::sync::{Arc, Mutex}; +use std::thread::Scope; + +pub struct ThreadPool<'scope> { + task_queue_sender: Sender>, // Implictly our threads in the thread pool have the receiver + _phantom_data: std::marker::PhantomData<&'scope ()>, } -impl<'scope, 'env> ThreadPool<'scope, 'env> { - pub fn new(thread_count: usize) -> Self { - thread::scope(|s| { - for _ in 0..thread_count { - s.spawn(|| { - // Thread work goes here - }); +impl<'scope> ThreadPool<'scope> { + pub fn new(thread_count: usize, scope: &'scope Scope<'scope, '_>) -> Self { + let (task_queue_sender, receiver) = channel::>(); + let task_queue_rx = Arc::new(Mutex::new(receiver)); + + for _ in 0..thread_count { + let task_queue_rx_clone = task_queue_rx.clone(); + scope.spawn(move || { + // Thread work goes here + while let Ok(task) = { + let rx = task_queue_rx_clone.lock().unwrap(); + rx.recv() + } { + task(); } }); + } ThreadPool { - s: , + task_queue_sender, + _phantom_data: std::marker::PhantomData, } } -} - -fn execute<'a>(s: &'a thread::Scope<'a, '_>, f: impl FnOnce() + std::marker::Send + 'a) { - s.spawn(|| f); -} -fn run_scoped_threads() { - let mut a = vec![1, 2, 3]; - let mut x = 0; - - let f = || { - println!("hello from the first scoped thread"); - // We can borrow `a` here. - dbg!(&a); - }; - thread::scope(|s| { - s.spawn(f); - s.spawn(|| { - println!("hello from the second scoped thread"); - // We can even mutably borrow `x` here, - // because no other threads are using it. - x += a[0] + a[2]; - }); - println!("hello from the main thread"); - }); - - // After the scope, we can modify and access our variables again: - a.push(4); - assert_eq!(x, a.len()); + pub fn execute(&self, task: Box) { + self.task_queue_sender.send(task).unwrap(); + } } - -//std::thread::scope From d6509229c25262449b5e412c70314e4d2ce5d2b7 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Thu, 25 Sep 2025 17:32:05 -0300 Subject: [PATCH 049/115] Adopted thread pool --- Cargo.lock | 13 +++++------ Cargo.toml | 2 ++ crates/common/trie/Cargo.toml | 1 + crates/common/trie/trie_sorted.rs | 14 ++++++++---- crates/networking/p2p/Cargo.toml | 2 +- crates/networking/p2p/sync.rs | 38 ++++++++++++++++++++++--------- pool_test/Cargo.toml | 4 +++- 7 files changed, 49 insertions(+), 25 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e87e54d73d0..a1eab7f7d95 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4320,11 +4320,11 @@ dependencies = [ "hex-literal", "hmac", "lazy_static", + "pool_test", "prometheus 0.14.0", "rand 0.8.5", "rayon", "rocksdb", - "scoped_threadpool", "secp256k1", "serde", "serde_json", @@ -4585,6 +4585,7 @@ dependencies = [ "hex-literal", "lazy_static", "libmdbx", + "pool_test", "proptest", "rand 0.8.5", "rayon", @@ -8596,6 +8597,10 @@ dependencies = [ "miniz_oxide 0.8.9", ] +[[package]] +name = "pool_test" +version = "0.1.0" + [[package]] name = "portable-atomic" version = "1.11.1" @@ -10718,12 +10723,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "scoped_threadpool" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d51f5df5af43ab3f1360b429fa5e0152ac5ce8c0bd6485cae490332e96846a8" - [[package]] name = "scopeguard" version = "1.2.0" diff --git a/Cargo.toml b/Cargo.toml index 2d48f36071b..710e24ac12d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ members = [ "tooling/archive_sync", "tooling/replayer", "crates/common/config", + "pool_test", ] resolver = "2" @@ -75,6 +76,7 @@ ethrex-prover = { path = "./crates/l2/prover" } ethrex-storage-rollup = { path = "./crates/l2/storage" } ethrex = { path = "./cmd/ethrex" } ethrex-l2-rpc = { path = "./crates/l2/networking/rpc" } +pool_test = { path = "./pool_test" } tracing = { version = "0.1", features = ["log"] } tracing-subscriber = { version = "0.3.0", features = ["env-filter"] } diff --git a/crates/common/trie/Cargo.toml b/crates/common/trie/Cargo.toml index 103bab62c3d..4b4e0b4f2fc 100644 --- a/crates/common/trie/Cargo.toml +++ b/crates/common/trie/Cargo.toml @@ -23,6 +23,7 @@ smallvec = { version = "1.10.0", features = ["const_generics", "union"] } digest = "0.10.6" lazy_static.workspace = true rayon.workspace = true +pool_test.workspace = true [features] default = [] diff --git a/crates/common/trie/trie_sorted.rs b/crates/common/trie/trie_sorted.rs index 6de6891dffd..1fbc746f83a 100644 --- a/crates/common/trie/trie_sorted.rs +++ b/crates/common/trie/trie_sorted.rs @@ -3,7 +3,8 @@ use crate::{ node::{BranchNode, ExtensionNode, LeafNode}, }; use ethereum_types::H256; -use rayon::{Scope, scope}; +use pool_test::ThreadPool; +use std::{sync::Arc, thread::scope}; use tracing::debug; #[derive(Debug, Default, Clone)] @@ -134,7 +135,7 @@ fn flush_nodes_to_write( pub fn trie_from_sorted_accounts<'scope, T>( db: &'scope dyn TrieDB, data_iter: &mut T, - scope: &Scope<'scope>, + scope: Arc>, ) -> Result where T: Iterator)> + Send, @@ -162,9 +163,9 @@ where while let Some(right_side) = right_side_opt { if nodes_to_write.len() as u64 > SIZE_TO_WRITE_DB { - scope.spawn(move |_| { + scope.execute(Box::new(move || { let _ = flush_nodes_to_write(nodes_to_write, db); - }); + })); nodes_to_write = Vec::with_capacity(SIZE_TO_WRITE_DB as usize + 65); } @@ -277,7 +278,10 @@ pub fn trie_from_sorted_accounts_wrap( where T: Iterator)> + Send, { - scope(|s| trie_from_sorted_accounts(db, accounts_iter, s)) + scope(|s| { + let pool = ThreadPool::new(12, s); + trie_from_sorted_accounts(db, accounts_iter, Arc::new(pool)) + }) } #[cfg(test)] diff --git a/crates/networking/p2p/Cargo.toml b/crates/networking/p2p/Cargo.toml index 3fa82c64b93..432c70b7bd1 100644 --- a/crates/networking/p2p/Cargo.toml +++ b/crates/networking/p2p/Cargo.toml @@ -35,7 +35,7 @@ cfg-if.workspace = true rocksdb = { workspace = true, optional = true } prometheus = "0.14.0" -scoped_threadpool = "0.1.9" +pool_test.workspace = true tokio-stream = "0.1.17" sha3 = "0.10.8" diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 2fb12d8a4a5..402900cd492 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1624,7 +1624,9 @@ async fn insert_storage_into_rocksdb( account_state_snapshots_dir: &Path, temp_db_dir: &Path, ) -> Result<(), SyncError> { - use ethrex_trie::trie_sorted::trie_from_sorted_accounts_wrap; + use ethrex_trie::trie_sorted::trie_from_sorted_accounts; + use pool_test::ThreadPool; + use std::thread::scope; struct RocksDBIterator<'a> { iter: rocksdb::DBRawIterator<'a>, @@ -1674,24 +1676,36 @@ async fn insert_storage_into_rocksdb( db.ingest_external_file(file_paths) .map_err(|err| SyncError::RocksDBError(err.into_string()))?; - accounts_with_storage.into_par_iter().map(|account_hash| { - let store_clone = store.clone(); - let mut iter = db.raw_iterator(); - - let trie = store_clone + let account_with_storage_and_tries = accounts_with_storage + .into_iter() + .map(|account_hash| { + ( + account_hash, + store .open_storage_trie(account_hash, *EMPTY_TRIE_HASH) - .expect("Should be able to open trie"); + .expect("Should be able to open trie"), + ) + }) + .collect::>(); + + scope(|scope| { + let pool = Arc::new(ThreadPool::new(16, scope)); + for (account_hash, trie) in account_with_storage_and_tries.iter() { + let pool_clone = pool.clone(); + let mut iter = db.raw_iterator(); + let task = Box::new(move || { let mut initial_key = account_hash.as_bytes().to_vec(); initial_key.extend([0_u8; 32]); iter.seek(initial_key); let mut iter = RocksDBIterator { iter, - limit: account_hash, + limit: *account_hash, }; - let result = trie_from_sorted_accounts_wrap( + let result = trie_from_sorted_accounts( trie.db(), &mut iter, + pool_clone ) .inspect_err(|err: &TrieGenerationError| { error!( @@ -1700,7 +1714,9 @@ async fn insert_storage_into_rocksdb( }) .map_err(SyncError::TrieGenerationError); METRICS.storage_tries_state_roots_computed.inc(); - result - }).collect::, _>>()?; + result; + }); + } + }); Ok(()) } diff --git a/pool_test/Cargo.toml b/pool_test/Cargo.toml index 39c1cfcb0f8..1061e8fc9ad 100644 --- a/pool_test/Cargo.toml +++ b/pool_test/Cargo.toml @@ -2,6 +2,8 @@ name = "pool_test" version = "0.1.0" edition = "2024" -[workspace] + +[lib] +path = "src/myscope.rs" [dependencies] From 67181485a100ac8a7faf72037c71b0c24e7f9a43 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Thu, 25 Sep 2025 17:45:19 -0300 Subject: [PATCH 050/115] Pool arc --- crates/networking/p2p/sync.rs | 5 +++-- pool_test/src/myscope.rs | 8 ++------ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 402900cd492..60f99dd492a 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1689,9 +1689,10 @@ async fn insert_storage_into_rocksdb( .collect::>(); scope(|scope| { - let pool = Arc::new(ThreadPool::new(16, scope)); + let pool = ThreadPool::new(16, scope); + let pool_arc = Arc::new(pool); for (account_hash, trie) in account_with_storage_and_tries.iter() { - let pool_clone = pool.clone(); + let pool_clone = pool_arc.clone(); let mut iter = db.raw_iterator(); let task = Box::new(move || { let mut initial_key = account_hash.as_bytes().to_vec(); diff --git a/pool_test/src/myscope.rs b/pool_test/src/myscope.rs index 9ab9dcec682..f0be39b1fdd 100644 --- a/pool_test/src/myscope.rs +++ b/pool_test/src/myscope.rs @@ -1,11 +1,10 @@ use std::marker::Send; -use std::sync::mpsc::{channel, Receiver, Sender}; +use std::sync::mpsc::{Receiver, Sender, channel}; use std::sync::{Arc, Mutex}; use std::thread::Scope; pub struct ThreadPool<'scope> { task_queue_sender: Sender>, // Implictly our threads in the thread pool have the receiver - _phantom_data: std::marker::PhantomData<&'scope ()>, } impl<'scope> ThreadPool<'scope> { @@ -26,10 +25,7 @@ impl<'scope> ThreadPool<'scope> { }); } - ThreadPool { - task_queue_sender, - _phantom_data: std::marker::PhantomData, - } + ThreadPool { task_queue_sender } } pub fn execute(&self, task: Box) { From c3015299cd067975d2755efc64601c8161ccbb0c Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Thu, 25 Sep 2025 17:47:13 -0300 Subject: [PATCH 051/115] Update sync.rs --- crates/networking/p2p/sync.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 60f99dd492a..14cd3ff8a87 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1689,10 +1689,9 @@ async fn insert_storage_into_rocksdb( .collect::>(); scope(|scope| { - let pool = ThreadPool::new(16, scope); - let pool_arc = Arc::new(pool); + let pool = Arc::new(ThreadPool::new(16, scope)); for (account_hash, trie) in account_with_storage_and_tries.iter() { - let pool_clone = pool_arc.clone(); + let pool_clone = pool.clone(); let mut iter = db.raw_iterator(); let task = Box::new(move || { let mut initial_key = account_hash.as_bytes().to_vec(); @@ -1717,6 +1716,7 @@ async fn insert_storage_into_rocksdb( METRICS.storage_tries_state_roots_computed.inc(); result; }); + pool.execute(task); } }); Ok(()) From 8b5e93474b76007799d7e145b2187677a8dd3e16 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Thu, 25 Sep 2025 18:07:28 -0300 Subject: [PATCH 052/115] naming threads --- pool_test/src/myscope.rs | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/pool_test/src/myscope.rs b/pool_test/src/myscope.rs index f0be39b1fdd..0c6909c10fe 100644 --- a/pool_test/src/myscope.rs +++ b/pool_test/src/myscope.rs @@ -1,7 +1,7 @@ use std::marker::Send; use std::sync::mpsc::{Receiver, Sender, channel}; use std::sync::{Arc, Mutex}; -use std::thread::Scope; +use std::thread::{Builder, Scope}; pub struct ThreadPool<'scope> { task_queue_sender: Sender>, // Implictly our threads in the thread pool have the receiver @@ -12,17 +12,19 @@ impl<'scope> ThreadPool<'scope> { let (task_queue_sender, receiver) = channel::>(); let task_queue_rx = Arc::new(Mutex::new(receiver)); - for _ in 0..thread_count { + for i in 0..thread_count { let task_queue_rx_clone = task_queue_rx.clone(); - scope.spawn(move || { - // Thread work goes here - while let Ok(task) = { - let rx = task_queue_rx_clone.lock().unwrap(); - rx.recv() - } { - task(); - } - }); + let _ = Builder::new() + .name(format!("ThreadPool {i}")) + .spawn_scoped(scope, move || { + // Thread work goes here + while let Ok(task) = { + let rx = task_queue_rx_clone.lock().unwrap(); + rx.recv() + } { + task(); + } + }); } ThreadPool { task_queue_sender } From aa6fdbefd6f49a72453e7f9ed8f6dfe08b18489c Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Thu, 25 Sep 2025 18:25:47 -0300 Subject: [PATCH 053/115] Update sync.rs --- crates/networking/p2p/sync.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 14cd3ff8a87..d3b368c36aa 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1689,7 +1689,14 @@ async fn insert_storage_into_rocksdb( .collect::>(); scope(|scope| { - let pool = Arc::new(ThreadPool::new(16, scope)); + use std::num::NonZero; + + let pool = Arc::new(ThreadPool::new( + std::thread::available_parallelism() + .map(|num| num.into()) + .unwrap_or(8), + scope, + )); for (account_hash, trie) in account_with_storage_and_tries.iter() { let pool_clone = pool.clone(); let mut iter = db.raw_iterator(); From 53b816355154e5a446867113662296b0d7e07c53 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Thu, 25 Sep 2025 18:27:24 -0300 Subject: [PATCH 054/115] Executed separatedly last task --- crates/common/trie/trie_sorted.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/common/trie/trie_sorted.rs b/crates/common/trie/trie_sorted.rs index 1fbc746f83a..c61dd0b43a4 100644 --- a/crates/common/trie/trie_sorted.rs +++ b/crates/common/trie/trie_sorted.rs @@ -267,7 +267,9 @@ where .finalize() }; - flush_nodes_to_write(nodes_to_write, db)?; + scope.execute(Box::new(move || { + let _ = flush_nodes_to_write(nodes_to_write, db); + })); Ok(hash) } From 8e704cca4fa369bb51828a676143461d9f66ad72 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Fri, 26 Sep 2025 12:50:54 -0300 Subject: [PATCH 055/115] Added crossbeam --- Cargo.lock | 18 +++++++++++++++++- pool_test/Cargo.toml | 1 + pool_test/src/myscope.rs | 12 ++++-------- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a1eab7f7d95..67c70e47da9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2471,6 +2471,19 @@ dependencies = [ "crossbeam-utils 0.7.2", ] +[[package]] +name = "crossbeam" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" +dependencies = [ + "crossbeam-channel 0.5.15", + "crossbeam-deque 0.8.6", + "crossbeam-epoch 0.9.18", + "crossbeam-queue 0.3.12", + "crossbeam-utils 0.8.21", +] + [[package]] name = "crossbeam-channel" version = "0.4.4" @@ -8600,6 +8613,9 @@ dependencies = [ [[package]] name = "pool_test" version = "0.1.0" +dependencies = [ + "crossbeam 0.8.4", +] [[package]] name = "portable-atomic" @@ -11854,7 +11870,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76347472cc448d47dbf9f67541fde19dbb054793e8e0546ce8917bfb695e1b56" dependencies = [ - "crossbeam", + "crossbeam 0.7.3", "tokio", "tokio-stream", "tokio-util", diff --git a/pool_test/Cargo.toml b/pool_test/Cargo.toml index 1061e8fc9ad..35cf24655b8 100644 --- a/pool_test/Cargo.toml +++ b/pool_test/Cargo.toml @@ -7,3 +7,4 @@ edition = "2024" path = "src/myscope.rs" [dependencies] +crossbeam = "0.8.4" diff --git a/pool_test/src/myscope.rs b/pool_test/src/myscope.rs index 0c6909c10fe..ab23b5c1b25 100644 --- a/pool_test/src/myscope.rs +++ b/pool_test/src/myscope.rs @@ -1,5 +1,5 @@ +use crossbeam::channel::{Receiver, Sender, unbounded}; use std::marker::Send; -use std::sync::mpsc::{Receiver, Sender, channel}; use std::sync::{Arc, Mutex}; use std::thread::{Builder, Scope}; @@ -9,19 +9,15 @@ pub struct ThreadPool<'scope> { impl<'scope> ThreadPool<'scope> { pub fn new(thread_count: usize, scope: &'scope Scope<'scope, '_>) -> Self { - let (task_queue_sender, receiver) = channel::>(); - let task_queue_rx = Arc::new(Mutex::new(receiver)); + let (task_queue_sender, receiver) = unbounded::>(); for i in 0..thread_count { - let task_queue_rx_clone = task_queue_rx.clone(); + let task_queue_rx_clone = receiver.clone(); let _ = Builder::new() .name(format!("ThreadPool {i}")) .spawn_scoped(scope, move || { // Thread work goes here - while let Ok(task) = { - let rx = task_queue_rx_clone.lock().unwrap(); - rx.recv() - } { + while let Ok(task) = task_queue_rx_clone.recv() { task(); } }); From ab9df8b63995e54522a874daf47810eee42af3b1 Mon Sep 17 00:00:00 2001 From: Javier Chatruc Date: Fri, 26 Sep 2025 12:54:14 -0300 Subject: [PATCH 056/115] Merged the snap sync optimizations that improve storage leaves download time and reduce healing time to a minimum --- crates/networking/p2p/peer_handler.rs | 213 +++++++++++++++--- crates/networking/p2p/sync.rs | 6 +- crates/networking/p2p/sync/state_healing.rs | 7 +- crates/networking/p2p/sync/storage_healing.rs | 20 -- 4 files changed, 185 insertions(+), 61 deletions(-) diff --git a/crates/networking/p2p/peer_handler.rs b/crates/networking/p2p/peer_handler.rs index 32cd6ef2959..5b29eb50e6c 100644 --- a/crates/networking/p2p/peer_handler.rs +++ b/crates/networking/p2p/peer_handler.rs @@ -1,5 +1,5 @@ use std::{ - collections::{BTreeMap, HashSet, VecDeque}, + collections::{BTreeMap, HashMap, HashSet, VecDeque}, io::ErrorKind, path::{Path, PathBuf}, sync::atomic::Ordering, @@ -12,6 +12,7 @@ use ethrex_common::{ types::{AccountState, BlockBody, BlockHeader, Receipt, validate_block_body}, }; use ethrex_rlp::encode::RLPEncode; +use ethrex_storage::Store; use ethrex_trie::Nibbles; use ethrex_trie::{Node, verify_range}; use rand::seq::SliceRandom; @@ -1279,16 +1280,32 @@ impl PeerHandler { account_storages_snapshots_dir: &Path, mut chunk_index: u64, pivot_header: &mut BlockHeader, + store: Store, ) -> Result { *METRICS.current_step.lock().await = "Requesting Storage Ranges".to_string(); debug!("Starting request_storage_ranges function"); // 1) split the range in chunks of same length let mut accounts_by_root_hash: BTreeMap<_, Vec<_>> = BTreeMap::new(); - for (account, root_hash) in &account_storage_roots.accounts_with_storage_root { - accounts_by_root_hash - .entry(*root_hash) - .or_default() - .push(*account); + for (account, (maybe_root_hash, _)) in &account_storage_roots.accounts_with_storage_root { + match maybe_root_hash { + Some(root) => { + accounts_by_root_hash + .entry(*root) + .or_default() + .push(*account); + } + None => { + let root = store + .get_account_state_by_acc_hash(pivot_header.hash(), *account) + .expect("Failed to get account in state trie") + .expect("Could not find account that should have been downloaded or healed") + .storage_root; + accounts_by_root_hash + .entry(root) + .or_default() + .push(*account); + } + } } let mut accounts_by_root_hash = Vec::from_iter(accounts_by_root_hash); accounts_by_root_hash.sort_unstable_by_key(|(_, accounts)| !accounts.len()); @@ -1313,6 +1330,7 @@ impl PeerHandler { // list of tasks to be executed // Types are (start_index, end_index, starting_hash) // NOTE: end_index is NOT inclusive + let mut tasks_queue_not_started = VecDeque::::new(); for i in 0..chunk_count { let chunk_start = chunk_size * i; @@ -1342,7 +1360,7 @@ impl PeerHandler { let mut completed_tasks = 0; // TODO: in a refactor, delete this replace with a structure that can handle removes - let mut accounts_done: Vec = Vec::new(); + let mut accounts_done: HashMap> = HashMap::new(); // Maps storage root to vector of hashed addresses matching that root and // vector of hashed storage keys and storage values. let mut current_account_storages: BTreeMap, Vec<(H256, U256)>)> = @@ -1407,11 +1425,13 @@ impl PeerHandler { self.peer_table.free_peer(peer_id).await; - accounts_done.extend( - accounts_by_root_hash[start_index..remaining_start] - .iter() - .flat_map(|(_, accounts)| accounts.iter().copied()), - ); + for (_, accounts) in accounts_by_root_hash[start_index..remaining_start].iter() { + for account in accounts { + if !accounts_done.contains_key(account) { + accounts_done.insert(*account, vec![]); + } + } + } if remaining_start < remaining_end { debug!("Failed to download entire chunk from peer {peer_id}"); @@ -1436,11 +1456,57 @@ impl PeerHandler { }; tasks_queue_not_started.push_back(task); task_count += 1; - accounts_done - .extend(accounts_by_root_hash[remaining_start].1.iter().copied()); + + let acc_hash = accounts_by_root_hash[remaining_start].1[0]; + let (_, old_intervals) = account_storage_roots + .accounts_with_storage_root + .get_mut(&acc_hash).ok_or(PeerHandlerError::UnrecoverableError("Tried to get the old download intervals for an account but did not find them".to_owned()))?; + for (old_start, end) in old_intervals { + if end == &hash_end { + *old_start = hash_start; + } + } account_storage_roots .healed_accounts .extend(accounts_by_root_hash[start_index].1.iter().copied()); + } else { + let mut acc_hash: H256 = H256::zero(); + // This search could potentially be expensive, but it's something that should happen very + // infrequently (only when we encounter an account we think it's big but it's not). In + // normal cases the vec we are iterating over just has one element (the big account). + for account in accounts_by_root_hash[remaining_start].1.iter() { + if let Some((_, old_intervals)) = account_storage_roots + .accounts_with_storage_root + .get(&account) + { + if !old_intervals.is_empty() { + acc_hash = *account; + } + } else { + continue; + } + } + if acc_hash.is_zero() { + panic!("Should have found the account hash"); + } + let (_, old_intervals) = account_storage_roots + .accounts_with_storage_root + .get_mut(&acc_hash) + .ok_or(PeerHandlerError::UnrecoverableError("Tried to get the old download intervals for an account but did not find them".to_owned()))?; + old_intervals.remove( + old_intervals + .iter() + .position(|(_old_start, end)| end == &hash_end) + .ok_or(PeerHandlerError::UnrecoverableError( + "Could not find an old interval that we were tracking" + .to_owned(), + ))?, + ); + if old_intervals.is_empty() { + for account in accounts_by_root_hash[remaining_start].1.iter() { + accounts_done.insert(*account, vec![]); + } + } } } else { if remaining_start + 1 < remaining_end { @@ -1471,27 +1537,96 @@ impl PeerHandler { let chunk_count = (missing_storage_range / chunk_size).as_usize().max(1); - for i in 0..chunk_count { - let start_hash_u256 = start_hash_u256 + chunk_size * i; - let start_hash = H256::from_uint(&start_hash_u256); - let end_hash = if i == chunk_count - 1 { - H256::repeat_byte(0xff) + let maybe_old_intervals = account_storage_roots + .accounts_with_storage_root + .get(&accounts_by_root_hash[remaining_start].1[0]); + + if let Some((_, old_intervals)) = maybe_old_intervals { + if !old_intervals.is_empty() { + for (start_hash, end_hash) in old_intervals { + let task = StorageTask { + start_index: remaining_start, + end_index: remaining_start + 1, + start_hash: *start_hash, + end_hash: Some(*end_hash), + }; + + tasks_queue_not_started.push_back(task); + task_count += 1; + } } else { - let end_hash_u256 = - start_hash_u256.checked_add(chunk_size).unwrap_or(U256::MAX); - H256::from_uint(&end_hash_u256) - }; - - let task = StorageTask { - start_index: remaining_start, - end_index: remaining_start + 1, - start_hash, - end_hash: Some(end_hash), - }; - tasks_queue_not_started.push_back(task); - task_count += 1; + // TODO: DRY + account_storage_roots.accounts_with_storage_root.insert( + accounts_by_root_hash[remaining_start].1[0], + (None, vec![]), + ); + let (_, intervals) = account_storage_roots + .accounts_with_storage_root + .get_mut(&accounts_by_root_hash[remaining_start].1[0]) + .ok_or(PeerHandlerError::UnrecoverableError("Tried to get the old download intervals for an account but did not find them".to_owned()))?; + + for i in 0..chunk_count { + let start_hash_u256 = start_hash_u256 + chunk_size * i; + let start_hash = H256::from_uint(&start_hash_u256); + let end_hash = if i == chunk_count - 1 { + H256::repeat_byte(0xff) + } else { + let end_hash_u256 = start_hash_u256 + .checked_add(chunk_size) + .unwrap_or(U256::MAX); + H256::from_uint(&end_hash_u256) + }; + + let task = StorageTask { + start_index: remaining_start, + end_index: remaining_start + 1, + start_hash, + end_hash: Some(end_hash), + }; + + intervals.push((start_hash, end_hash)); + + tasks_queue_not_started.push_back(task); + task_count += 1; + } + debug!("Split big storage account into {chunk_count} chunks."); + } + } else { + account_storage_roots.accounts_with_storage_root.insert( + accounts_by_root_hash[remaining_start].1[0], + (None, vec![]), + ); + let (_, intervals) = account_storage_roots + .accounts_with_storage_root + .get_mut(&accounts_by_root_hash[remaining_start].1[0]) + .ok_or(PeerHandlerError::UnrecoverableError("Trie to get the old download intervals for an account but did not find them".to_owned()))?; + + for i in 0..chunk_count { + let start_hash_u256 = start_hash_u256 + chunk_size * i; + let start_hash = H256::from_uint(&start_hash_u256); + let end_hash = if i == chunk_count - 1 { + H256::repeat_byte(0xff) + } else { + let end_hash_u256 = start_hash_u256 + .checked_add(chunk_size) + .unwrap_or(U256::MAX); + H256::from_uint(&end_hash_u256) + }; + + let task = StorageTask { + start_index: remaining_start, + end_index: remaining_start + 1, + start_hash, + end_hash: Some(end_hash), + }; + + intervals.push((start_hash, end_hash)); + + tasks_queue_not_started.push_back(task); + task_count += 1; + } + debug!("Split big storage account into {chunk_count} chunks."); } - debug!("Split big storage account into {chunk_count} chunks."); } } @@ -1619,10 +1754,12 @@ impl PeerHandler { .collect::, DumpError>>() .map_err(PeerHandlerError::DumpError)?; - for account_done in accounts_done { - account_storage_roots - .accounts_with_storage_root - .remove(&account_done); + for (account_done, intervals) in accounts_done { + if intervals.is_empty() { + account_storage_roots + .accounts_with_storage_root + .remove(&account_done); + } } // Dropping the task sender so that the recv returns None @@ -2015,6 +2152,8 @@ pub enum PeerHandlerError { NoResponseFromPeer, #[error("Dumping snapshots to disk failed {0:?}")] DumpError(DumpError), + #[error("Encountered an unexpected error. This is a bug {0}")] + UnrecoverableError(String), } #[derive(Debug, Clone, std::hash::Hash)] diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index c0c19f0305f..934f61f5b54 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -905,7 +905,7 @@ impl Syncer { .zip(account_states.iter()) .filter_map(|(hash, state)| { (state.storage_root != *EMPTY_TRIE_HASH) - .then_some((*hash, state.storage_root)) + .then_some((*hash, (Some(state.storage_root), vec![]))) }), ); @@ -999,6 +999,7 @@ impl Syncer { account_storages_snapshots_dir.as_ref(), chunk_index, &mut pivot_header, + store.clone(), ) .await .map_err(SyncError::PeerHandler)?; @@ -1351,11 +1352,12 @@ pub fn calculate_staleness_timestamp(timestamp: u64) -> u64 { timestamp + (SNAP_LIMIT as u64 * 12) } #[derive(Debug, Default)] +#[allow(clippy::type_complexity)] /// We store for optimization the accounts that need to heal storage pub struct AccountStorageRoots { /// The accounts that have not been healed are guaranteed to have the original storage root /// we can read this storage root - pub accounts_with_storage_root: BTreeMap, + pub accounts_with_storage_root: BTreeMap, Vec<(H256, H256)>)>, /// If an account has been healed, it may return to a previous state, so we just store the account /// in a hashset pub healed_accounts: HashSet, diff --git a/crates/networking/p2p/sync/state_healing.rs b/crates/networking/p2p/sync/state_healing.rs index 9ea8c6caa11..08214734b99 100644 --- a/crates/networking/p2p/sync/state_healing.rs +++ b/crates/networking/p2p/sync/state_healing.rs @@ -181,9 +181,12 @@ async fn heal_state_trie( } storage_accounts.healed_accounts.insert(account_hash); - storage_accounts + let old_value = storage_accounts .accounts_with_storage_root - .remove(&account_hash); + .get_mut(&account_hash); + if let Some((old_root, _)) = old_value { + *old_root = None; + } } } leafs_healed += nodes diff --git a/crates/networking/p2p/sync/storage_healing.rs b/crates/networking/p2p/sync/storage_healing.rs index 13474c46490..a331951d770 100644 --- a/crates/networking/p2p/sync/storage_healing.rs +++ b/crates/networking/p2p/sync/storage_healing.rs @@ -543,26 +543,6 @@ fn get_initial_downloads( }) .collect::>(), ); - initial_requests.extend( - account_paths - .accounts_with_storage_root - .par_iter() - .filter_map(|(acc_path, storage_root)| { - if store - .contains_storage_node(*acc_path, *storage_root) - .expect("We should be able to open the store") - { - return None; - } - Some(NodeRequest { - acc_path: Nibbles::from_bytes(&acc_path.0), - storage_path: Nibbles::default(), // We need to be careful, the root parent is a special case - parent: Nibbles::default(), - hash: *storage_root, - }) - }) - .collect::>(), - ); initial_requests } From af4c4e41b5770bfcaa5fc86d56b3946313482294 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Fri, 26 Sep 2025 14:28:24 -0300 Subject: [PATCH 057/115] 2 pools --- crates/networking/p2p/sync.rs | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index d3b368c36aa..cce496a5076 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1691,14 +1691,22 @@ async fn insert_storage_into_rocksdb( scope(|scope| { use std::num::NonZero; - let pool = Arc::new(ThreadPool::new( + let process_pool = ThreadPool::new( std::thread::available_parallelism() .map(|num| num.into()) - .unwrap_or(8), + .unwrap_or(8) + / 2, + scope, + ); + let write_pool = Arc::new(ThreadPool::new( + std::thread::available_parallelism() + .map(|num| num.into()) + .unwrap_or(8) + / 2, scope, )); for (account_hash, trie) in account_with_storage_and_tries.iter() { - let pool_clone = pool.clone(); + let write_pool = write_pool.clone(); let mut iter = db.raw_iterator(); let task = Box::new(move || { let mut initial_key = account_hash.as_bytes().to_vec(); @@ -1712,7 +1720,7 @@ async fn insert_storage_into_rocksdb( let result = trie_from_sorted_accounts( trie.db(), &mut iter, - pool_clone + write_pool ) .inspect_err(|err: &TrieGenerationError| { error!( @@ -1723,7 +1731,7 @@ async fn insert_storage_into_rocksdb( METRICS.storage_tries_state_roots_computed.inc(); result; }); - pool.execute(task); + process_pool.execute(task); } }); Ok(()) From 888472abcf3428a77fb742752d8d7c008648ce3b Mon Sep 17 00:00:00 2001 From: Javier Chatruc Date: Fri, 26 Sep 2025 15:03:05 -0300 Subject: [PATCH 058/115] Comment out debug! on insertion for now --- crates/networking/p2p/sync.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 934f61f5b54..45f77b10f46 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1076,11 +1076,11 @@ impl Syncer { pivot_hash_moved, ); let duration = Instant::now() - start; - debug!( - duration = duration.as_micros(), - keys = n_keys, - "Computed Storage Root" - ); + // debug!( + // duration = duration.as_micros(), + // keys = n_keys, + // "Computed Storage Root" + // ); changes }) .collect::, SyncError>>() From b75034f36a2939ec8f476c65fab9805cee534995 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Fri, 26 Sep 2025 15:33:22 -0300 Subject: [PATCH 059/115] Testing priority queue --- crates/common/trie/trie_sorted.rs | 6 ++---- pool_test/src/main.rs | 14 +++++++------ pool_test/src/myscope.rs | 34 +++++++++++++++++++++++-------- 3 files changed, 36 insertions(+), 18 deletions(-) diff --git a/crates/common/trie/trie_sorted.rs b/crates/common/trie/trie_sorted.rs index c61dd0b43a4..e291af4ec33 100644 --- a/crates/common/trie/trie_sorted.rs +++ b/crates/common/trie/trie_sorted.rs @@ -163,7 +163,7 @@ where while let Some(right_side) = right_side_opt { if nodes_to_write.len() as u64 > SIZE_TO_WRITE_DB { - scope.execute(Box::new(move || { + scope.execute_priority(Box::new(move || { let _ = flush_nodes_to_write(nodes_to_write, db); })); nodes_to_write = Vec::with_capacity(SIZE_TO_WRITE_DB as usize + 65); @@ -267,9 +267,7 @@ where .finalize() }; - scope.execute(Box::new(move || { - let _ = flush_nodes_to_write(nodes_to_write, db); - })); + flush_nodes_to_write(nodes_to_write, db); Ok(hash) } diff --git a/pool_test/src/main.rs b/pool_test/src/main.rs index 339a8a8de6e..30452c02639 100644 --- a/pool_test/src/main.rs +++ b/pool_test/src/main.rs @@ -1,4 +1,8 @@ -use std::{sync::Arc, thread::scope}; +use std::{ + sync::Arc, + thread::{scope, sleep}, + time::Duration, +}; use crate::myscope::ThreadPool; @@ -7,19 +11,17 @@ pub mod myscope; fn main() { println!("Start"); scope(|s| { - let pool = ThreadPool::new(4, s); + let pool = ThreadPool::new(1, s); let pool_arc = Arc::new(pool); let pool_arc_2 = pool_arc.clone(); pool_arc.execute(Box::new(move || { - let x = 1; + sleep(Duration::from_secs(1)); println!("Inside, Inside, world!"); pool_arc_2.execute(Box::new(move || { - let x = 3; println!("3, world!"); })); })); - pool_arc.execute(Box::new(move || { - let x = 2; + pool_arc.execute_priority(Box::new(move || { println!("Inside, world!"); })); }); diff --git a/pool_test/src/myscope.rs b/pool_test/src/myscope.rs index ab23b5c1b25..8aa4991ccc8 100644 --- a/pool_test/src/myscope.rs +++ b/pool_test/src/myscope.rs @@ -1,32 +1,50 @@ -use crossbeam::channel::{Receiver, Sender, unbounded}; +use crossbeam::channel::{Receiver, Select, Sender, select_biased, unbounded}; use std::marker::Send; use std::sync::{Arc, Mutex}; use std::thread::{Builder, Scope}; pub struct ThreadPool<'scope> { - task_queue_sender: Sender>, // Implictly our threads in the thread pool have the receiver + priority_sender: Sender>, // Implictly our threads in the thread pool have the receiver + nice_sender: Sender>, // Implictly our threads in the thread pool have the receiver } impl<'scope> ThreadPool<'scope> { pub fn new(thread_count: usize, scope: &'scope Scope<'scope, '_>) -> Self { - let (task_queue_sender, receiver) = unbounded::>(); + let (priority_sender, priority_receiver) = unbounded::>(); + let (nice_sender, nice_receiver) = unbounded::>(); for i in 0..thread_count { - let task_queue_rx_clone = receiver.clone(); + let priority_receiver = priority_receiver.clone(); + let nice_receiver = nice_receiver.clone(); let _ = Builder::new() .name(format!("ThreadPool {i}")) .spawn_scoped(scope, move || { // Thread work goes here - while let Ok(task) = task_queue_rx_clone.recv() { + while let Ok(task) = select_biased! { + recv(priority_receiver) -> msg => msg, + recv(nice_receiver) -> msg => msg, + } { + task(); + } + while let Ok(task) = priority_receiver.recv() { + task(); + } + while let Ok(task) = nice_receiver.recv() { task(); } }); } - - ThreadPool { task_queue_sender } + ThreadPool { + priority_sender, + nice_sender, + } } pub fn execute(&self, task: Box) { - self.task_queue_sender.send(task).unwrap(); + self.nice_sender.send(task).unwrap(); + } + + pub fn execute_priority(&self, task: Box) { + self.priority_sender.send(task).unwrap(); } } From a7e20846fd8de1980114b5174fde98edda4df1fd Mon Sep 17 00:00:00 2001 From: Javier Chatruc Date: Fri, 26 Sep 2025 15:50:24 -0300 Subject: [PATCH 060/115] Add TODO --- crates/networking/p2p/peer_handler.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/networking/p2p/peer_handler.rs b/crates/networking/p2p/peer_handler.rs index 5b29eb50e6c..6efb8ba26c3 100644 --- a/crates/networking/p2p/peer_handler.rs +++ b/crates/networking/p2p/peer_handler.rs @@ -1308,6 +1308,7 @@ impl PeerHandler { } } let mut accounts_by_root_hash = Vec::from_iter(accounts_by_root_hash); + // TODO: Turn this into a stable sort for binary search. accounts_by_root_hash.sort_unstable_by_key(|(_, accounts)| !accounts.len()); let chunk_size = 300; let chunk_count = (accounts_by_root_hash.len() / chunk_size) + 1; From 08fbdf6592e476ea95c0ce64ccdaf696af47d1a2 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Fri, 26 Sep 2025 15:55:01 -0300 Subject: [PATCH 061/115] Update sync.rs --- crates/networking/p2p/sync.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index cce496a5076..72c50dd7dd4 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1675,6 +1675,8 @@ async fn insert_storage_into_rocksdb( .collect(); db.ingest_external_file(file_paths) .map_err(|err| SyncError::RocksDBError(err.into_string()))?; + db.compact_range(Option::<&[u8]>::None, Option::<&[u8]>::None); + let snapshot = db.snapshot(); let account_with_storage_and_tries = accounts_with_storage .into_iter() @@ -1707,7 +1709,7 @@ async fn insert_storage_into_rocksdb( )); for (account_hash, trie) in account_with_storage_and_tries.iter() { let write_pool = write_pool.clone(); - let mut iter = db.raw_iterator(); + let mut iter = snapshot.raw_iterator(); let task = Box::new(move || { let mut initial_key = account_hash.as_bytes().to_vec(); initial_key.extend([0_u8; 32]); From 07b8ea558695c6e7da81dd65412ba44b6ea2c53f Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Fri, 26 Sep 2025 15:57:28 -0300 Subject: [PATCH 062/115] Update sync.rs --- crates/networking/p2p/sync.rs | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 72c50dd7dd4..df4e3e1333a 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1693,22 +1693,14 @@ async fn insert_storage_into_rocksdb( scope(|scope| { use std::num::NonZero; - let process_pool = ThreadPool::new( + let pool: Arc> = Arc::new(ThreadPool::new( std::thread::available_parallelism() .map(|num| num.into()) - .unwrap_or(8) - / 2, - scope, - ); - let write_pool = Arc::new(ThreadPool::new( - std::thread::available_parallelism() - .map(|num| num.into()) - .unwrap_or(8) - / 2, + .unwrap_or(8), scope, )); for (account_hash, trie) in account_with_storage_and_tries.iter() { - let write_pool = write_pool.clone(); + let pool_clone = pool.clone(); let mut iter = snapshot.raw_iterator(); let task = Box::new(move || { let mut initial_key = account_hash.as_bytes().to_vec(); @@ -1722,7 +1714,7 @@ async fn insert_storage_into_rocksdb( let result = trie_from_sorted_accounts( trie.db(), &mut iter, - write_pool + pool_clone ) .inspect_err(|err: &TrieGenerationError| { error!( @@ -1733,7 +1725,7 @@ async fn insert_storage_into_rocksdb( METRICS.storage_tries_state_roots_computed.inc(); result; }); - process_pool.execute(task); + pool.execute(task); } }); Ok(()) From ec81b864f3cd3ea5ee5752ba7658db61d0aa9ca6 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Fri, 26 Sep 2025 17:54:31 -0300 Subject: [PATCH 063/115] Compiles --- Cargo.lock | 1 + crates/networking/p2p/Cargo.toml | 1 + crates/networking/p2p/sync.rs | 14 +++++++++----- crates/networking/p2p/utils.rs | 17 ++++++++++------- 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 67c70e47da9..e376329b53b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4332,6 +4332,7 @@ dependencies = [ "hex", "hex-literal", "hmac", + "itertools 0.14.0", "lazy_static", "pool_test", "prometheus 0.14.0", diff --git a/crates/networking/p2p/Cargo.toml b/crates/networking/p2p/Cargo.toml index 432c70b7bd1..1ece3951568 100644 --- a/crates/networking/p2p/Cargo.toml +++ b/crates/networking/p2p/Cargo.toml @@ -34,6 +34,7 @@ futures.workspace = true cfg-if.workspace = true rocksdb = { workspace = true, optional = true } prometheus = "0.14.0" +itertools = "0.14.0" pool_test.workspace = true tokio-stream = "0.1.17" diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index ba0dd6ddfe6..7a4cbab923e 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1469,7 +1469,8 @@ async fn insert_accounts_into_db( storage_accounts.accounts_with_storage_root.extend( account_states_snapshot.iter().filter_map(|(hash, state)| { - (state.storage_root != *EMPTY_TRIE_HASH).then_some((*hash, state.storage_root)) + (state.storage_root != *EMPTY_TRIE_HASH) + .then_some((*hash, (Some(state.storage_root), Vec::new()))) }), ); @@ -1570,7 +1571,9 @@ async fn insert_storages_into_db( .await??; info!("Writing to db"); - store.write_storage_trie_nodes_batch(storage_trie_node_changes) + store + .write_storage_trie_nodes_batch(storage_trie_node_changes) + .await?; } Ok(()) } @@ -1620,9 +1623,10 @@ async fn insert_accounts_into_rocksdb( .fetch_add(1, Ordering::Relaxed); let account_state = AccountState::decode(v).expect("We should have accounts here"); if account_state.storage_root != *EMPTY_TRIE_HASH { - storage_accounts - .accounts_with_storage_root - .insert(H256::from_slice(k), account_state.storage_root); + storage_accounts.accounts_with_storage_root.insert( + H256::from_slice(k), + (Some(account_state.storage_root), Vec::new()), + ); } }) .map(|(k, v)| (H256::from_slice(&k), v.to_vec())), diff --git a/crates/networking/p2p/utils.rs b/crates/networking/p2p/utils.rs index 7d3c8119ad6..2ab11d9dde2 100644 --- a/crates/networking/p2p/utils.rs +++ b/crates/networking/p2p/utils.rs @@ -8,6 +8,7 @@ use ethrex_common::utils::keccak; use ethrex_common::{H256, H512, U256, types::AccountState}; use ethrex_rlp::{encode::RLPEncode, error::RLPDecodeError}; use ethrex_trie::Node; +use itertools::{Itertools, iproduct}; use secp256k1::{PublicKey, SecretKey}; use spawned_concurrency::error::GenServerError; @@ -147,7 +148,7 @@ pub fn dump_accounts_to_file( pub fn dump_storages_to_file( path: &Path, - storages: Vec<(H256, Vec<(H256, U256)>)>, + storages: Vec<(Vec, Vec<(H256, U256)>)>, ) -> Result<(), DumpError> { cfg_if::cfg_if! { if #[cfg(feature = "rocksdb")] { @@ -155,12 +156,14 @@ pub fn dump_storages_to_file( path, storages .into_iter() - .flat_map(|(hash, slots)| { - slots.into_iter().map(move |(slot_hash, slot_value)| { - let key = [hash.as_bytes(), slot_hash.as_bytes()].concat(); - (key, slot_value.encode_to_vec()) - }) - }).collect::>() + .flat_map(|(accounts, slots)| { + accounts.into_iter().map(|hash| { + slots.iter().map(move |(slot_hash, slot_value)| { + let key = [hash.as_bytes(), slot_hash.as_bytes()].concat(); + (key, slot_value.encode_to_vec()) + }).collect::>() + }).collect::>() + }).flatten().collect::>() ) .inspect_err(|err| error!("Rocksdb writing stt error {err:?}")) .map_err(|_| DumpError { From 7a7e0f346bf6c483e9d5e7c360017193f9437666 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Fri, 26 Sep 2025 18:52:21 -0300 Subject: [PATCH 064/115] Update sync.rs --- crates/networking/p2p/sync.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 7a4cbab923e..8293a0c0f29 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1707,6 +1707,8 @@ async fn insert_storage_into_rocksdb( }) .collect::>(); + use tokio::sync::Semaphore; + let semaphore = Semaphore::new(10_000); scope(|scope| { use std::num::NonZero; @@ -1717,8 +1719,14 @@ async fn insert_storage_into_rocksdb( scope, )); for (account_hash, trie) in account_with_storage_and_tries.iter() { + use tokio::runtime::Handle; + let pool_clone = pool.clone(); let mut iter = snapshot.raw_iterator(); + let permit = semaphore.acquire(); + let permit = Handle::current() + .block_on(permit) + .expect("Should get a permit"); let task = Box::new(move || { let mut initial_key = account_hash.as_bytes().to_vec(); initial_key.extend([0_u8; 32]); @@ -1728,7 +1736,7 @@ async fn insert_storage_into_rocksdb( limit: *account_hash, }; - let result = trie_from_sorted_accounts( + let _ = trie_from_sorted_accounts( trie.db(), &mut iter, pool_clone @@ -1740,7 +1748,7 @@ async fn insert_storage_into_rocksdb( }) .map_err(SyncError::TrieGenerationError); METRICS.storage_tries_state_roots_computed.inc(); - result; + permit.forget(); }); pool.execute(task); } From 4e3621b7f7fb7de2c705bb94a8d28dbe95907c15 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Fri, 26 Sep 2025 19:01:31 -0300 Subject: [PATCH 065/115] Simple semaphore --- Cargo.lock | 1 + crates/networking/p2p/Cargo.toml | 1 + crates/networking/p2p/sync.rs | 23 ++++++++++++----------- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e376329b53b..46bea6c4536 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4320,6 +4320,7 @@ dependencies = [ "bytes", "cfg-if 1.0.3", "concat-kdf", + "crossbeam 0.8.4", "ctr", "ethereum-types 0.15.1", "ethrex-blockchain", diff --git a/crates/networking/p2p/Cargo.toml b/crates/networking/p2p/Cargo.toml index 1ece3951568..d2ac7d36706 100644 --- a/crates/networking/p2p/Cargo.toml +++ b/crates/networking/p2p/Cargo.toml @@ -50,6 +50,7 @@ ctr = "0.9.2" rand = "0.8.5" rayon = "1.10.0" +crossbeam = "0.8.4" [dev-dependencies] hex-literal = "0.4.1" diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 8293a0c0f29..d83f432f886 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1707,11 +1707,12 @@ async fn insert_storage_into_rocksdb( }) .collect::>(); - use tokio::sync::Semaphore; - let semaphore = Semaphore::new(10_000); - scope(|scope| { - use std::num::NonZero; + use crossbeam::channel::unbounded; + + let (sender, receiver) = unbounded::<()>(); + let mut counter = 0; + scope(|scope| { let pool: Arc> = Arc::new(ThreadPool::new( std::thread::available_parallelism() .map(|num| num.into()) @@ -1719,14 +1720,14 @@ async fn insert_storage_into_rocksdb( scope, )); for (account_hash, trie) in account_with_storage_and_tries.iter() { - use tokio::runtime::Handle; - + let sender = sender.clone(); + if counter > 100_000 { + let _ = receiver.recv(); + counter -= 1; + } + counter += 1; let pool_clone = pool.clone(); let mut iter = snapshot.raw_iterator(); - let permit = semaphore.acquire(); - let permit = Handle::current() - .block_on(permit) - .expect("Should get a permit"); let task = Box::new(move || { let mut initial_key = account_hash.as_bytes().to_vec(); initial_key.extend([0_u8; 32]); @@ -1748,7 +1749,7 @@ async fn insert_storage_into_rocksdb( }) .map_err(SyncError::TrieGenerationError); METRICS.storage_tries_state_roots_computed.inc(); - permit.forget(); + let _ = sender.send(()); }); pool.execute(task); } From 193327df7411eb015f7660171999f8eede898dda Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Fri, 26 Sep 2025 19:07:51 -0300 Subject: [PATCH 066/115] Update sync.rs --- crates/networking/p2p/sync.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index d83f432f886..c19df92d8aa 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1729,9 +1729,9 @@ async fn insert_storage_into_rocksdb( let pool_clone = pool.clone(); let mut iter = snapshot.raw_iterator(); let task = Box::new(move || { - let mut initial_key = account_hash.as_bytes().to_vec(); - initial_key.extend([0_u8; 32]); - iter.seek(initial_key); + let mut buffer: [u8; 64] = [0_u8; 64]; + buffer[..32].copy_from_slice(&account_hash.0); + iter.seek(buffer); let mut iter = RocksDBIterator { iter, limit: *account_hash, From fa9ac95a3695e1effbe0bf8eb44ce16fa058d642 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Mon, 29 Sep 2025 12:48:31 -0300 Subject: [PATCH 067/115] Changed to fixed amount of buffers --- Cargo.lock | 1 + Cargo.toml | 1 + crates/common/trie/Cargo.toml | 1 + crates/common/trie/db.rs | 2 +- crates/common/trie/trie_sorted.rs | 40 ++++++++++++++++++++++++------- crates/networking/p2p/Cargo.toml | 2 +- crates/networking/p2p/sync.rs | 27 ++++++++++++++------- crates/storage/trie_db/rocksdb.rs | 5 +++- pool_test/Cargo.toml | 2 +- 9 files changed, 59 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 46bea6c4536..fc69b61f386 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4592,6 +4592,7 @@ dependencies = [ "bytes", "cita_trie", "criterion", + "crossbeam 0.8.4", "digest 0.10.7", "ethereum-types 0.15.1", "ethrex-rlp", diff --git a/Cargo.toml b/Cargo.toml index 710e24ac12d..c996a67f36f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -120,6 +120,7 @@ spawned-concurrency = "0.4.0" spawned-rt = "0.4.0" lambdaworks-crypto = "0.11.0" tui-logger = { version = "0.17.3", features = ["tracing-support"] } +crossbeam = "0.8.4" rayon = "1.10.0" rkyv = { version = "0.8.10", features = ["std", "unaligned"] } tempfile = "3.8" diff --git a/crates/common/trie/Cargo.toml b/crates/common/trie/Cargo.toml index 4b4e0b4f2fc..3da1710b85b 100644 --- a/crates/common/trie/Cargo.toml +++ b/crates/common/trie/Cargo.toml @@ -24,6 +24,7 @@ digest = "0.10.6" lazy_static.workspace = true rayon.workspace = true pool_test.workspace = true +crossbeam.workspace = true [features] default = [] diff --git a/crates/common/trie/db.rs b/crates/common/trie/db.rs index d32c8708859..c7199ebbe6f 100644 --- a/crates/common/trie/db.rs +++ b/crates/common/trie/db.rs @@ -10,7 +10,7 @@ use std::{ pub trait TrieDB: Send + Sync { fn get(&self, key: NodeHash) -> Result>, TrieError>; fn put_batch(&self, key_values: Vec<(NodeHash, Vec)>) -> Result<(), TrieError>; - fn put_batch_no_alloc(&self, key_values: Vec) -> Result<(), TrieError> { + fn put_batch_no_alloc(&self, key_values: &Vec) -> Result<(), TrieError> { self.put_batch( key_values .into_iter() diff --git a/crates/common/trie/trie_sorted.rs b/crates/common/trie/trie_sorted.rs index e291af4ec33..c8ffa5107cf 100644 --- a/crates/common/trie/trie_sorted.rs +++ b/crates/common/trie/trie_sorted.rs @@ -2,6 +2,7 @@ use crate::{ EMPTY_TRIE_HASH, Nibbles, Node, TrieDB, TrieError, node::{BranchNode, ExtensionNode, LeafNode}, }; +use crossbeam::channel::{Receiver, Sender, bounded, unbounded}; use ethereum_types::H256; use pool_test::ThreadPool; use std::{sync::Arc, thread::scope}; @@ -124,11 +125,15 @@ fn add_center_to_parent_and_write_queue( } fn flush_nodes_to_write( - nodes_to_write: Vec, + mut nodes_to_write: Vec, db: &dyn TrieDB, + sender: Sender>, ) -> Result<(), TrieGenerationError> { - db.put_batch_no_alloc(nodes_to_write) - .map_err(TrieGenerationError::FlushToDbError) + db.put_batch_no_alloc(&nodes_to_write) + .map_err(TrieGenerationError::FlushToDbError)?; + nodes_to_write.clear(); + sender.send(nodes_to_write); + Ok(()) } #[inline(never)] @@ -136,11 +141,15 @@ pub fn trie_from_sorted_accounts<'scope, T>( db: &'scope dyn TrieDB, data_iter: &mut T, scope: Arc>, + buffer_sender: Sender>, + buffer_receiver: Receiver>, ) -> Result where T: Iterator)> + Send, { - let mut nodes_to_write: Vec = Vec::with_capacity(SIZE_TO_WRITE_DB as usize + 65); + let mut nodes_to_write: Vec = buffer_receiver + .recv() + .expect("This channel shouldn't close"); let mut trie_stack: Vec = Vec::with_capacity(64); // Optimized for H256 let mut left_side = StackElement::default(); @@ -157,16 +166,19 @@ where value: initial_value.1, }; let hash = node.compute_hash().finalize(); - flush_nodes_to_write(vec![node.into()], db)?; + flush_nodes_to_write(vec![node.into()], db, buffer_sender)?; return Ok(hash); } while let Some(right_side) = right_side_opt { if nodes_to_write.len() as u64 > SIZE_TO_WRITE_DB { + let buffer_sender = buffer_sender.clone(); scope.execute_priority(Box::new(move || { - let _ = flush_nodes_to_write(nodes_to_write, db); + let _ = flush_nodes_to_write(nodes_to_write, db, buffer_sender); })); - nodes_to_write = Vec::with_capacity(SIZE_TO_WRITE_DB as usize + 65); + nodes_to_write = buffer_receiver + .recv() + .expect("This channel shouldn't close"); } let right_side_path = Nibbles::from_bytes(right_side.0.as_bytes()); @@ -267,7 +279,7 @@ where .finalize() }; - flush_nodes_to_write(nodes_to_write, db); + flush_nodes_to_write(nodes_to_write, db, buffer_sender); Ok(hash) } @@ -278,9 +290,19 @@ pub fn trie_from_sorted_accounts_wrap( where T: Iterator)> + Send, { + let (buffer_sender, buffer_receiver) = bounded::>(1001); + for _ in 0..1_000 { + buffer_sender.send(Vec::with_capacity(20_065)); + } scope(|s| { let pool = ThreadPool::new(12, s); - trie_from_sorted_accounts(db, accounts_iter, Arc::new(pool)) + trie_from_sorted_accounts( + db, + accounts_iter, + Arc::new(pool), + buffer_sender, + buffer_receiver, + ) }) } diff --git a/crates/networking/p2p/Cargo.toml b/crates/networking/p2p/Cargo.toml index d2ac7d36706..28b86346eda 100644 --- a/crates/networking/p2p/Cargo.toml +++ b/crates/networking/p2p/Cargo.toml @@ -50,7 +50,7 @@ ctr = "0.9.2" rand = "0.8.5" rayon = "1.10.0" -crossbeam = "0.8.4" +crossbeam.workspace = true [dev-dependencies] hex-literal = "0.4.1" diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index c19df92d8aa..33a5fe29fc8 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1707,21 +1707,28 @@ async fn insert_storage_into_rocksdb( }) .collect::>(); - use crossbeam::channel::unbounded; + use crossbeam::channel::{bounded, unbounded}; + + use ethrex_trie::Node; let (sender, receiver) = unbounded::<()>(); let mut counter = 0; + let thread_count = std::thread::available_parallelism() + .map(|num| num.into()) + .unwrap_or(8); + + let (buffer_sender, buffer_receiver) = bounded::>(1001); + for _ in 0..1_000 { + buffer_sender.send(Vec::with_capacity(20_065)); + } scope(|scope| { - let pool: Arc> = Arc::new(ThreadPool::new( - std::thread::available_parallelism() - .map(|num| num.into()) - .unwrap_or(8), - scope, - )); + let pool: Arc> = Arc::new(ThreadPool::new(thread_count, scope)); for (account_hash, trie) in account_with_storage_and_tries.iter() { let sender = sender.clone(); - if counter > 100_000 { + let buffer_sender = buffer_sender.clone(); + let buffer_receiver = buffer_receiver.clone(); + if counter >= thread_count - 1 { let _ = receiver.recv(); counter -= 1; } @@ -1740,7 +1747,9 @@ async fn insert_storage_into_rocksdb( let _ = trie_from_sorted_accounts( trie.db(), &mut iter, - pool_clone + pool_clone, + buffer_sender, + buffer_receiver, ) .inspect_err(|err: &TrieGenerationError| { error!( diff --git a/crates/storage/trie_db/rocksdb.rs b/crates/storage/trie_db/rocksdb.rs index 8060aa4b20f..91a3d00aa3d 100644 --- a/crates/storage/trie_db/rocksdb.rs +++ b/crates/storage/trie_db/rocksdb.rs @@ -81,7 +81,10 @@ impl TrieDB for RocksDBTrieDB { .map_err(|e| TrieError::DbError(anyhow::anyhow!("RocksDB batch write error: {}", e))) } - fn put_batch_no_alloc(&self, key_values: Vec) -> Result<(), TrieError> { + fn put_batch_no_alloc( + &self, + key_values: &std::vec::Vec, + ) -> Result<(), TrieError> { let cf = self.cf_handle()?; let mut batch = rocksdb::WriteBatch::default(); let mut buffer = Vec::with_capacity(300); diff --git a/pool_test/Cargo.toml b/pool_test/Cargo.toml index 35cf24655b8..1dcda389166 100644 --- a/pool_test/Cargo.toml +++ b/pool_test/Cargo.toml @@ -7,4 +7,4 @@ edition = "2024" path = "src/myscope.rs" [dependencies] -crossbeam = "0.8.4" +crossbeam.workspace = true From e0ec83640987c946dd881a1e9d4ffc823b056e06 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Mon, 29 Sep 2025 14:32:45 -0300 Subject: [PATCH 068/115] Removed unneeded compaction --- crates/networking/p2p/sync.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 33a5fe29fc8..d3cffa4eb9f 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1692,7 +1692,6 @@ async fn insert_storage_into_rocksdb( .collect(); db.ingest_external_file(file_paths) .map_err(|err| SyncError::RocksDBError(err.into_string()))?; - db.compact_range(Option::<&[u8]>::None, Option::<&[u8]>::None); let snapshot = db.snapshot(); let account_with_storage_and_tries = accounts_with_storage From c93aa46d31becd29f654a5e362d584959f6c9567 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Mon, 29 Sep 2025 15:11:31 -0300 Subject: [PATCH 069/115] cleanup --- Cargo.lock | 18 ++++----- Cargo.toml | 4 +- crates/common/trie/Cargo.toml | 2 +- crates/common/trie/db.rs | 4 +- crates/common/trie/trie_sorted.rs | 10 ++--- {pool_test => crates/concurrency}/Cargo.lock | 0 {pool_test => crates/concurrency}/Cargo.toml | 4 +- .../concurrency/concurrency.rs | 5 ++- crates/networking/p2p/Cargo.toml | 2 +- crates/networking/p2p/peer_handler.rs | 36 +++++++++-------- crates/networking/p2p/sync.rs | 39 ++++++++++--------- crates/networking/p2p/utils.rs | 19 ++++++--- crates/storage/trie_db/rocksdb.rs | 5 +-- pool_test/src/main.rs | 29 -------------- 14 files changed, 79 insertions(+), 98 deletions(-) rename {pool_test => crates/concurrency}/Cargo.lock (100%) rename {pool_test => crates/concurrency}/Cargo.toml (65%) rename pool_test/src/myscope.rs => crates/concurrency/concurrency.rs (89%) delete mode 100644 pool_test/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index fc69b61f386..bd3901ba537 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4328,6 +4328,7 @@ dependencies = [ "ethrex-rlp", "ethrex-storage", "ethrex-storage-rollup", + "ethrex-threadpool", "ethrex-trie", "futures", "hex", @@ -4335,7 +4336,6 @@ dependencies = [ "hmac", "itertools 0.14.0", "lazy_static", - "pool_test", "prometheus 0.14.0", "rand 0.8.5", "rayon", @@ -4584,6 +4584,13 @@ dependencies = [ "tracing", ] +[[package]] +name = "ethrex-threadpool" +version = "0.1.0" +dependencies = [ + "crossbeam 0.8.4", +] + [[package]] name = "ethrex-trie" version = "0.1.0" @@ -4596,12 +4603,12 @@ dependencies = [ "digest 0.10.7", "ethereum-types 0.15.1", "ethrex-rlp", + "ethrex-threadpool", "hasher", "hex", "hex-literal", "lazy_static", "libmdbx", - "pool_test", "proptest", "rand 0.8.5", "rayon", @@ -8613,13 +8620,6 @@ dependencies = [ "miniz_oxide 0.8.9", ] -[[package]] -name = "pool_test" -version = "0.1.0" -dependencies = [ - "crossbeam 0.8.4", -] - [[package]] name = "portable-atomic" version = "1.11.1" diff --git a/Cargo.toml b/Cargo.toml index c996a67f36f..f79dff8d45a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,7 +33,7 @@ members = [ "tooling/archive_sync", "tooling/replayer", "crates/common/config", - "pool_test", + "crates/concurrency", ] resolver = "2" @@ -76,7 +76,7 @@ ethrex-prover = { path = "./crates/l2/prover" } ethrex-storage-rollup = { path = "./crates/l2/storage" } ethrex = { path = "./cmd/ethrex" } ethrex-l2-rpc = { path = "./crates/l2/networking/rpc" } -pool_test = { path = "./pool_test" } +ethrex-threadpool = { path = "./crates/concurrency" } tracing = { version = "0.1", features = ["log"] } tracing-subscriber = { version = "0.3.0", features = ["env-filter"] } diff --git a/crates/common/trie/Cargo.toml b/crates/common/trie/Cargo.toml index 3da1710b85b..accf5a939ba 100644 --- a/crates/common/trie/Cargo.toml +++ b/crates/common/trie/Cargo.toml @@ -7,6 +7,7 @@ documentation.workspace = true [dependencies] ethrex-rlp.workspace = true +ethrex-threadpool.workspace = true ethereum-types.workspace = true anyhow = "1.0.86" @@ -23,7 +24,6 @@ smallvec = { version = "1.10.0", features = ["const_generics", "union"] } digest = "0.10.6" lazy_static.workspace = true rayon.workspace = true -pool_test.workspace = true crossbeam.workspace = true [features] diff --git a/crates/common/trie/db.rs b/crates/common/trie/db.rs index c7199ebbe6f..99964db9b9f 100644 --- a/crates/common/trie/db.rs +++ b/crates/common/trie/db.rs @@ -10,10 +10,10 @@ use std::{ pub trait TrieDB: Send + Sync { fn get(&self, key: NodeHash) -> Result>, TrieError>; fn put_batch(&self, key_values: Vec<(NodeHash, Vec)>) -> Result<(), TrieError>; - fn put_batch_no_alloc(&self, key_values: &Vec) -> Result<(), TrieError> { + fn put_batch_no_alloc(&self, key_values: &[Node]) -> Result<(), TrieError> { self.put_batch( key_values - .into_iter() + .iter() .map(|node| (node.compute_hash(), node.encode_to_vec())) .collect(), ) diff --git a/crates/common/trie/trie_sorted.rs b/crates/common/trie/trie_sorted.rs index c8ffa5107cf..028bd33ed7a 100644 --- a/crates/common/trie/trie_sorted.rs +++ b/crates/common/trie/trie_sorted.rs @@ -2,9 +2,9 @@ use crate::{ EMPTY_TRIE_HASH, Nibbles, Node, TrieDB, TrieError, node::{BranchNode, ExtensionNode, LeafNode}, }; -use crossbeam::channel::{Receiver, Sender, bounded, unbounded}; +use crossbeam::channel::{Receiver, Sender, bounded}; use ethereum_types::H256; -use pool_test::ThreadPool; +use ethrex_threadpool::ThreadPool; use std::{sync::Arc, thread::scope}; use tracing::debug; @@ -132,7 +132,7 @@ fn flush_nodes_to_write( db.put_batch_no_alloc(&nodes_to_write) .map_err(TrieGenerationError::FlushToDbError)?; nodes_to_write.clear(); - sender.send(nodes_to_write); + let _ = sender.send(nodes_to_write); Ok(()) } @@ -279,7 +279,7 @@ where .finalize() }; - flush_nodes_to_write(nodes_to_write, db, buffer_sender); + let _ = flush_nodes_to_write(nodes_to_write, db, buffer_sender); Ok(hash) } @@ -292,7 +292,7 @@ where { let (buffer_sender, buffer_receiver) = bounded::>(1001); for _ in 0..1_000 { - buffer_sender.send(Vec::with_capacity(20_065)); + let _ = buffer_sender.send(Vec::with_capacity(20_065)); } scope(|s| { let pool = ThreadPool::new(12, s); diff --git a/pool_test/Cargo.lock b/crates/concurrency/Cargo.lock similarity index 100% rename from pool_test/Cargo.lock rename to crates/concurrency/Cargo.lock diff --git a/pool_test/Cargo.toml b/crates/concurrency/Cargo.toml similarity index 65% rename from pool_test/Cargo.toml rename to crates/concurrency/Cargo.toml index 1dcda389166..0ab63655ef9 100644 --- a/pool_test/Cargo.toml +++ b/crates/concurrency/Cargo.toml @@ -1,10 +1,10 @@ [package] -name = "pool_test" +name = "ethrex-threadpool" version = "0.1.0" edition = "2024" [lib] -path = "src/myscope.rs" +path = "concurrency.rs" [dependencies] crossbeam.workspace = true diff --git a/pool_test/src/myscope.rs b/crates/concurrency/concurrency.rs similarity index 89% rename from pool_test/src/myscope.rs rename to crates/concurrency/concurrency.rs index 8aa4991ccc8..fa263cc91b5 100644 --- a/pool_test/src/myscope.rs +++ b/crates/concurrency/concurrency.rs @@ -1,6 +1,5 @@ -use crossbeam::channel::{Receiver, Select, Sender, select_biased, unbounded}; +use crossbeam::channel::{Sender, select_biased, unbounded}; use std::marker::Send; -use std::sync::{Arc, Mutex}; use std::thread::{Builder, Scope}; pub struct ThreadPool<'scope> { @@ -26,6 +25,8 @@ impl<'scope> ThreadPool<'scope> { } { task(); } + // If one of the senders closes because the threadpool is dropped, the other one + // channel may still exist and have data while let Ok(task) = priority_receiver.recv() { task(); } diff --git a/crates/networking/p2p/Cargo.toml b/crates/networking/p2p/Cargo.toml index 28b86346eda..1982fc73136 100644 --- a/crates/networking/p2p/Cargo.toml +++ b/crates/networking/p2p/Cargo.toml @@ -14,6 +14,7 @@ ethrex-rlp.workspace = true ethrex-storage.workspace = true ethrex-trie.workspace = true ethrex-storage-rollup.workspace = true +ethrex-threadpool.workspace = true ethereum-types.workspace = true async-trait.workspace = true @@ -36,7 +37,6 @@ rocksdb = { workspace = true, optional = true } prometheus = "0.14.0" itertools = "0.14.0" -pool_test.workspace = true tokio-stream = "0.1.17" sha3 = "0.10.8" diff --git a/crates/networking/p2p/peer_handler.rs b/crates/networking/p2p/peer_handler.rs index 7a17997af49..f6defb5a8c8 100644 --- a/crates/networking/p2p/peer_handler.rs +++ b/crates/networking/p2p/peer_handler.rs @@ -39,7 +39,7 @@ use crate::{ snap::encodable_to_proof, sync::{AccountStorageRoots, BlockSyncState, block_is_stale, update_pivot}, utils::{ - SendMessageError, dump_accounts_to_file, dump_storages_to_file, + AccountsWithStorage, SendMessageError, dump_accounts_to_file, dump_storages_to_file, get_account_state_snapshot_file, get_account_storages_snapshot_file, }, }; @@ -1339,22 +1339,18 @@ impl PeerHandler { let mut accounts_done: HashMap> = HashMap::new(); // Maps storage root to vector of hashed addresses matching that root and // vector of hashed storage keys and storage values. - let mut current_account_storages: BTreeMap, Vec<(H256, U256)>)> = - BTreeMap::new(); + let mut current_account_storages: BTreeMap = BTreeMap::new(); debug!("Starting request_storage_ranges loop"); loop { if current_account_storages .values() - .map(|(accounts, storages)| 32 * accounts.len() + 32 * storages.len()) + .map(|accounts| 32 * accounts.accounts.len() + 32 * accounts.storages.len()) .sum::() > RANGE_FILE_CHUNK_SIZE { let current_account_storages = std::mem::take(&mut current_account_storages); - let snapshot = current_account_storages - .into_iter() - .map(|(_, (accounts, storages))| (accounts, storages)) - .collect::>(); + let snapshot = current_account_storages.into_values().collect::>(); if !std::fs::exists(account_storages_snapshots_dir) .map_err(|_| PeerHandlerError::NoStorageSnapshotsDir)? @@ -1452,7 +1448,7 @@ impl PeerHandler { for account in accounts_by_root_hash[remaining_start].1.iter() { if let Some((_, old_intervals)) = account_storage_roots .accounts_with_storage_root - .get(&account) + .get(account) { if !old_intervals.is_empty() { acc_hash = *account; @@ -1640,13 +1636,22 @@ impl PeerHandler { // We downloaded a big storage account current_account_storages .entry(*root_hash) - .or_insert_with(|| (accounts.clone(), Vec::new())) - .1 + .or_insert_with(|| AccountsWithStorage { + accounts: accounts.clone(), + storages: Vec::new(), + }) + .storages .extend(account_storages.remove(0)); } else { - for (i, storage) in account_storages.into_iter().enumerate() { + for (i, storages) in account_storages.into_iter().enumerate() { let (root_hash, accounts) = &accounts_by_root_hash[start_index + i]; - current_account_storages.insert(*root_hash, (accounts.clone(), storage)); + current_account_storages.insert( + *root_hash, + AccountsWithStorage { + accounts: accounts.clone(), + storages, + }, + ); } } } @@ -1701,10 +1706,7 @@ impl PeerHandler { } { - let snapshot = current_account_storages - .into_iter() - .map(|(_, (accounts, storages))| (accounts, storages)) - .collect::>(); + let snapshot = current_account_storages.into_values().collect::>(); if !std::fs::exists(account_storages_snapshots_dir) .map_err(|_| PeerHandlerError::NoStorageSnapshotsDir)? diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index d3cffa4eb9f..36abad2282e 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -25,7 +25,7 @@ use ethrex_rlp::{decode::RLPDecode, encode::RLPEncode, error::RLPDecodeError}; use ethrex_storage::{EngineType, STATE_TRIE_SEGMENTS, Store, error::StoreError}; use ethrex_trie::trie_sorted::TrieGenerationError; use ethrex_trie::{Trie, TrieError}; -use rayon::iter::{IntoParallelIterator, ParallelBridge, ParallelIterator}; +use rayon::iter::{ParallelBridge, ParallelIterator}; #[cfg(not(feature = "rocksdb"))] use std::collections::hash_map::Entry; use std::collections::{BTreeMap, HashSet}; @@ -1514,9 +1514,13 @@ async fn insert_storages_into_db( maybe_big_account_storage_state_roots: &Arc>>, pivot_header: &BlockHeader, ) -> Result<(), SyncError> { + use rayon::iter::IntoParallelIterator; + for entry in std::fs::read_dir(account_storages_snapshots_dir) .map_err(|_| SyncError::AccountStoragesSnapshotsDirNotFound)? { + use crate::utils::AccountsWithStorage; + let entry = entry.map_err(|err| { SyncError::SnapshotReadError(account_storages_snapshots_dir.into(), err) })?; @@ -1527,8 +1531,15 @@ async fn insert_storages_into_db( let snapshot_contents = std::fs::read(&snapshot_path) .map_err(|err| SyncError::SnapshotReadError(snapshot_path.clone(), err))?; - let account_storages_snapshot: Vec<(Vec, Vec<(H256, U256)>)> = + #[expect(clippy::type_complexity)] + let account_storages_snapshot: Vec = RLPDecode::decode(&snapshot_contents) + .map(|all_accounts: Vec<(Vec, Vec<(H256, U256)>)>| { + all_accounts + .into_iter() + .map(|(accounts, storages)| AccountsWithStorage { accounts, storages }) + .collect() + }) .map_err(|_| SyncError::SnapshotDecodeError(snapshot_path.clone()))?; let maybe_big_account_storage_state_roots_clone = @@ -1541,30 +1552,22 @@ async fn insert_storages_into_db( account_storages_snapshot .into_par_iter() - .flat_map(|(accounts, storages)| { - let storages: Arc<[_]> = storages.into(); - accounts + .flat_map(|account_storages| { + let storages: Arc<[_]> = account_storages.storages.into(); + account_storages + .accounts .into_par_iter() // FIXME: we probably want to make storages an Arc .map(move |account| (account, storages.clone())) }) .map(|(account, storages)| { - let start = Instant::now(); - let n_keys = storages.len(); - let changes = compute_storage_roots( + compute_storage_roots( maybe_big_account_storage_state_roots_clone.clone(), store.clone(), account, &storages, pivot_hash_moved, - ); - let duration = Instant::now() - start; - // debug!( - // duration = duration.as_micros(), - // keys = n_keys, - // "Computed Storage Root" - // ); - changes + ) }) .collect::, SyncError>>() }) @@ -1641,8 +1644,8 @@ async fn insert_storage_into_rocksdb( account_state_snapshots_dir: &Path, temp_db_dir: &Path, ) -> Result<(), SyncError> { + use ethrex_threadpool::ThreadPool; use ethrex_trie::trie_sorted::trie_from_sorted_accounts; - use pool_test::ThreadPool; use std::thread::scope; struct RocksDBIterator<'a> { @@ -1718,7 +1721,7 @@ async fn insert_storage_into_rocksdb( let (buffer_sender, buffer_receiver) = bounded::>(1001); for _ in 0..1_000 { - buffer_sender.send(Vec::with_capacity(20_065)); + let _ = buffer_sender.send(Vec::with_capacity(20_065)); } scope(|scope| { diff --git a/crates/networking/p2p/utils.rs b/crates/networking/p2p/utils.rs index 2ab11d9dde2..9b15fbaf529 100644 --- a/crates/networking/p2p/utils.rs +++ b/crates/networking/p2p/utils.rs @@ -8,7 +8,6 @@ use ethrex_common::utils::keccak; use ethrex_common::{H256, H512, U256, types::AccountState}; use ethrex_rlp::{encode::RLPEncode, error::RLPDecodeError}; use ethrex_trie::Node; -use itertools::{Itertools, iproduct}; use secp256k1::{PublicKey, SecretKey}; use spawned_concurrency::error::GenServerError; @@ -146,9 +145,17 @@ pub fn dump_accounts_to_file( } } +/// Struct representing the storage slots of certain accounts that share the same storage root +pub struct AccountsWithStorage { + /// Accounts with the same storage root + pub accounts: Vec, + /// All slots in the trie from the accounts + pub storages: Vec<(H256, U256)>, +} + pub fn dump_storages_to_file( path: &Path, - storages: Vec<(Vec, Vec<(H256, U256)>)>, + storages: Vec, ) -> Result<(), DumpError> { cfg_if::cfg_if! { if #[cfg(feature = "rocksdb")] { @@ -156,9 +163,9 @@ pub fn dump_storages_to_file( path, storages .into_iter() - .flat_map(|(accounts, slots)| { - accounts.into_iter().map(|hash| { - slots.iter().map(move |(slot_hash, slot_value)| { + .flat_map(|accounts_with_slots| { + accounts_with_slots.accounts.into_iter().map(|hash| { + accounts_with_slots.storages.iter().map(move |(slot_hash, slot_value)| { let key = [hash.as_bytes(), slot_hash.as_bytes()].concat(); (key, slot_value.encode_to_vec()) }).collect::>() @@ -172,7 +179,7 @@ pub fn dump_storages_to_file( error: std::io::ErrorKind::Other, }) } else { - dump_to_file(path, storages.encode_to_vec()) + dump_to_file(path, storages.into_iter().map(|accounts_with_slots| (accounts_with_slots.accounts, accounts_with_slots.storages)).collect::>().encode_to_vec()) } } } diff --git a/crates/storage/trie_db/rocksdb.rs b/crates/storage/trie_db/rocksdb.rs index 91a3d00aa3d..15bbf11f811 100644 --- a/crates/storage/trie_db/rocksdb.rs +++ b/crates/storage/trie_db/rocksdb.rs @@ -81,10 +81,7 @@ impl TrieDB for RocksDBTrieDB { .map_err(|e| TrieError::DbError(anyhow::anyhow!("RocksDB batch write error: {}", e))) } - fn put_batch_no_alloc( - &self, - key_values: &std::vec::Vec, - ) -> Result<(), TrieError> { + fn put_batch_no_alloc(&self, key_values: &[ethrex_trie::Node]) -> Result<(), TrieError> { let cf = self.cf_handle()?; let mut batch = rocksdb::WriteBatch::default(); let mut buffer = Vec::with_capacity(300); diff --git a/pool_test/src/main.rs b/pool_test/src/main.rs deleted file mode 100644 index 30452c02639..00000000000 --- a/pool_test/src/main.rs +++ /dev/null @@ -1,29 +0,0 @@ -use std::{ - sync::Arc, - thread::{scope, sleep}, - time::Duration, -}; - -use crate::myscope::ThreadPool; - -pub mod myscope; - -fn main() { - println!("Start"); - scope(|s| { - let pool = ThreadPool::new(1, s); - let pool_arc = Arc::new(pool); - let pool_arc_2 = pool_arc.clone(); - pool_arc.execute(Box::new(move || { - sleep(Duration::from_secs(1)); - println!("Inside, Inside, world!"); - pool_arc_2.execute(Box::new(move || { - println!("3, world!"); - })); - })); - pool_arc.execute_priority(Box::new(move || { - println!("Inside, world!"); - })); - }); - println!("Hello, world!"); -} From 303f9e06adc1c67498fa2385ef45d7ab30fe1ca8 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Mon, 29 Sep 2025 15:40:58 -0300 Subject: [PATCH 070/115] Compiles --- Cargo.lock | 21 --------------------- crates/storage/trie_db/rocksdb.rs | 2 +- 2 files changed, 1 insertion(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6a8cec6931c..bd3901ba537 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9448,27 +9448,6 @@ dependencies = [ "bytecheck", ] -[[package]] -name = "reorgs" -version = "0.1.0" -dependencies = [ - "ethrex", - "ethrex-blockchain", - "ethrex-common", - "ethrex-config", - "ethrex-l2-common", - "ethrex-l2-rpc", - "ethrex-rpc", - "hex", - "nix", - "rand 0.8.5", - "secp256k1", - "sha2", - "tokio", - "tokio-util", - "tracing", -] - [[package]] name = "reqwest" version = "0.11.27" diff --git a/crates/storage/trie_db/rocksdb.rs b/crates/storage/trie_db/rocksdb.rs index a895c4175f3..4e6864b23ca 100644 --- a/crates/storage/trie_db/rocksdb.rs +++ b/crates/storage/trie_db/rocksdb.rs @@ -83,7 +83,7 @@ impl TrieDB for RocksDBTrieDB { fn put_batch_no_alloc(&self, key_values: &[ethrex_trie::Node]) -> Result<(), TrieError> { let cf = self.cf_handle()?; - let mut batch = rocksdb::WriteBatch::default(); + let mut batch = rocksdb::WriteBatchWithTransaction::default(); let mut buffer = Vec::with_capacity(300); for node in key_values { From adaf0b071fdd7c82b2d9596f33bbf797f9861a27 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Mon, 29 Sep 2025 15:49:18 -0300 Subject: [PATCH 071/115] fixed tomls --- Cargo.lock | 23 +++++++++++++++++++++-- Cargo.toml | 1 + crates/common/trie/Cargo.toml | 1 - crates/networking/p2p/Cargo.toml | 1 - 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bd3901ba537..c9e187b8052 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4334,7 +4334,6 @@ dependencies = [ "hex", "hex-literal", "hmac", - "itertools 0.14.0", "lazy_static", "prometheus 0.14.0", "rand 0.8.5", @@ -4611,7 +4610,6 @@ dependencies = [ "libmdbx", "proptest", "rand 0.8.5", - "rayon", "rocksdb", "serde", "serde_json", @@ -9448,6 +9446,27 @@ dependencies = [ "bytecheck", ] +[[package]] +name = "reorgs" +version = "0.1.0" +dependencies = [ + "ethrex", + "ethrex-blockchain", + "ethrex-common", + "ethrex-config", + "ethrex-l2-common", + "ethrex-l2-rpc", + "ethrex-rpc", + "hex", + "nix", + "rand 0.8.5", + "secp256k1", + "sha2", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "reqwest" version = "0.11.27" diff --git a/Cargo.toml b/Cargo.toml index f79dff8d45a..5fa58cba069 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ members = [ "tooling/archive_sync", "tooling/replayer", "crates/common/config", + "tooling/reorgs", "crates/concurrency", ] resolver = "2" diff --git a/crates/common/trie/Cargo.toml b/crates/common/trie/Cargo.toml index accf5a939ba..cccdcd81f77 100644 --- a/crates/common/trie/Cargo.toml +++ b/crates/common/trie/Cargo.toml @@ -23,7 +23,6 @@ rocksdb = { workspace = true, optional = true } smallvec = { version = "1.10.0", features = ["const_generics", "union"] } digest = "0.10.6" lazy_static.workspace = true -rayon.workspace = true crossbeam.workspace = true [features] diff --git a/crates/networking/p2p/Cargo.toml b/crates/networking/p2p/Cargo.toml index 1982fc73136..1c61ac16f3c 100644 --- a/crates/networking/p2p/Cargo.toml +++ b/crates/networking/p2p/Cargo.toml @@ -35,7 +35,6 @@ futures.workspace = true cfg-if.workspace = true rocksdb = { workspace = true, optional = true } prometheus = "0.14.0" -itertools = "0.14.0" tokio-stream = "0.1.17" sha3 = "0.10.8" From 1e0910aa32ae5a7543c5ac6818672b4962599bb1 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 30 Sep 2025 11:17:32 -0300 Subject: [PATCH 072/115] Update code_collector.rs --- crates/networking/p2p/sync/code_collector.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/networking/p2p/sync/code_collector.rs b/crates/networking/p2p/sync/code_collector.rs index f5ccaf81889..0038bc8c935 100644 --- a/crates/networking/p2p/sync/code_collector.rs +++ b/crates/networking/p2p/sync/code_collector.rs @@ -39,8 +39,8 @@ impl CodeHashCollector { self.buffer.insert(hash); } - // Used depending on if the feature flag rocksdb is used - #[allow(dead_code)] + // The optimization for rocksdb database doesn't use this method + #[cfg(not(feature = "rocksdb"))] /// Extends the buffer with a list of code hashes pub fn extend(&mut self, hashes: impl IntoIterator) { self.buffer.extend(hashes); From ac7a8b9b557612422df9d83277e10f833eeb8733 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 30 Sep 2025 11:21:52 -0300 Subject: [PATCH 073/115] Update rocksdb.rs --- crates/storage/trie_db/rocksdb.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/storage/trie_db/rocksdb.rs b/crates/storage/trie_db/rocksdb.rs index 4e6864b23ca..2e1a013b9e4 100644 --- a/crates/storage/trie_db/rocksdb.rs +++ b/crates/storage/trie_db/rocksdb.rs @@ -84,7 +84,8 @@ impl TrieDB for RocksDBTrieDB { fn put_batch_no_alloc(&self, key_values: &[ethrex_trie::Node]) -> Result<(), TrieError> { let cf = self.cf_handle()?; let mut batch = rocksdb::WriteBatchWithTransaction::default(); - let mut buffer = Vec::with_capacity(300); + // 532 is the maximum size of an encoded branch node. + let mut buffer = Vec::with_capacity(532); for node in key_values { let db_key = self.make_key(node.compute_hash()); From 6d0e8d87307272d4497e89bce887d4e590a552b5 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 30 Sep 2025 11:40:59 -0300 Subject: [PATCH 074/115] Removed unnecesaty alloc in put_batch_no_alloc --- crates/common/trie/db.rs | 4 +-- crates/common/trie/trie_sorted.rs | 51 ++++++++++++++++--------------- crates/networking/p2p/sync.rs | 4 +-- crates/storage/trie_db/rocksdb.rs | 14 ++++----- 4 files changed, 37 insertions(+), 36 deletions(-) diff --git a/crates/common/trie/db.rs b/crates/common/trie/db.rs index 99964db9b9f..4508e216371 100644 --- a/crates/common/trie/db.rs +++ b/crates/common/trie/db.rs @@ -10,11 +10,11 @@ use std::{ pub trait TrieDB: Send + Sync { fn get(&self, key: NodeHash) -> Result>, TrieError>; fn put_batch(&self, key_values: Vec<(NodeHash, Vec)>) -> Result<(), TrieError>; - fn put_batch_no_alloc(&self, key_values: &[Node]) -> Result<(), TrieError> { + fn put_batch_no_alloc(&self, key_values: &[(NodeHash, Node)]) -> Result<(), TrieError> { self.put_batch( key_values .iter() - .map(|node| (node.compute_hash(), node.encode_to_vec())) + .map(|node| (node.0, node.1.encode_to_vec())) .collect(), ) } diff --git a/crates/common/trie/trie_sorted.rs b/crates/common/trie/trie_sorted.rs index 028bd33ed7a..b2774c7dbc4 100644 --- a/crates/common/trie/trie_sorted.rs +++ b/crates/common/trie/trie_sorted.rs @@ -1,5 +1,5 @@ use crate::{ - EMPTY_TRIE_HASH, Nibbles, Node, TrieDB, TrieError, + EMPTY_TRIE_HASH, Nibbles, Node, NodeHash, TrieDB, TrieError, node::{BranchNode, ExtensionNode, LeafNode}, }; use crossbeam::channel::{Receiver, Sender, bounded}; @@ -78,7 +78,7 @@ fn create_parent(center_side: &CenterSide, closest_nibbles: &Nibbles) -> StackEl } fn add_center_to_parent_and_write_queue( - nodes_to_write: &mut Vec, + nodes_to_write: &mut Vec<(NodeHash, Node)>, center_side: &CenterSide, parent_element: &mut StackElement, ) -> Result<(), TrieGenerationError> { @@ -95,7 +95,7 @@ fn add_center_to_parent_and_write_queue( node.clone().into() } else { let hash = node.compute_hash(); - nodes_to_write.push(node.clone().into()); + nodes_to_write.push((hash, node.clone().into())); ExtensionNode { prefix: path, child: hash.into(), @@ -120,14 +120,14 @@ fn add_center_to_parent_and_write_queue( .filter_map(|(index, child)| child.is_valid().then_some(index)) .collect::>() ); - nodes_to_write.push(node); + nodes_to_write.push((node.compute_hash(), node)); Ok(()) } fn flush_nodes_to_write( - mut nodes_to_write: Vec, + mut nodes_to_write: Vec<(NodeHash, Node)>, db: &dyn TrieDB, - sender: Sender>, + sender: Sender>, ) -> Result<(), TrieGenerationError> { db.put_batch_no_alloc(&nodes_to_write) .map_err(TrieGenerationError::FlushToDbError)?; @@ -141,13 +141,13 @@ pub fn trie_from_sorted_accounts<'scope, T>( db: &'scope dyn TrieDB, data_iter: &mut T, scope: Arc>, - buffer_sender: Sender>, - buffer_receiver: Receiver>, + buffer_sender: Sender>, + buffer_receiver: Receiver>, ) -> Result where T: Iterator)> + Send, { - let mut nodes_to_write: Vec = buffer_receiver + let mut nodes_to_write: Vec<(NodeHash, Node)> = buffer_receiver .recv() .expect("This channel shouldn't close"); let mut trie_stack: Vec = Vec::with_capacity(64); // Optimized for H256 @@ -165,9 +165,9 @@ where partial: center_side.path, value: initial_value.1, }; - let hash = node.compute_hash().finalize(); - flush_nodes_to_write(vec![node.into()], db, buffer_sender)?; - return Ok(hash); + let hash = node.compute_hash(); + flush_nodes_to_write(vec![(hash, node.into())], db, buffer_sender)?; + return Ok(hash.finalize()); } while let Some(right_side) = right_side_opt { @@ -248,20 +248,20 @@ where .find(|(_, child)| child.is_valid()) .unwrap(); - debug_assert!(nodes_to_write.last().unwrap().compute_hash() == child.compute_hash()); - match nodes_to_write.iter_mut().last().unwrap() { + debug_assert!(nodes_to_write.last().unwrap().0 == child.compute_hash()); + let (_, node_hash_ref) = nodes_to_write.iter_mut().last().unwrap(); + match node_hash_ref { Node::Branch(_) => { - nodes_to_write.push( - ExtensionNode { - prefix: Nibbles::from_hex(vec![index as u8]), - child, - } - .into(), - ); + let node: Node = ExtensionNode { + prefix: Nibbles::from_hex(vec![index as u8]), + child, + } + .into(); + nodes_to_write.push((node.compute_hash(), node)); nodes_to_write .last() .expect("we just inserted") - .compute_hash() + .0 .finalize() } Node::Extension(extension_node) => { @@ -271,11 +271,12 @@ where Node::Leaf(leaf_node) => leaf_node.compute_hash().finalize(), } } else { - nodes_to_write.push(left_side.element.into()); + let node: Node = left_side.element.into(); + nodes_to_write.push((node.compute_hash(), node)); nodes_to_write .last() .expect("we just inserted") - .compute_hash() + .0 .finalize() }; @@ -290,7 +291,7 @@ pub fn trie_from_sorted_accounts_wrap( where T: Iterator)> + Send, { - let (buffer_sender, buffer_receiver) = bounded::>(1001); + let (buffer_sender, buffer_receiver) = bounded::>(1001); for _ in 0..1_000 { let _ = buffer_sender.send(Vec::with_capacity(20_065)); } diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 30cf6dadfb8..db686a552d8 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1645,7 +1645,7 @@ async fn insert_storage_into_rocksdb( temp_db_dir: &Path, ) -> Result<(), SyncError> { use ethrex_threadpool::ThreadPool; - use ethrex_trie::trie_sorted::trie_from_sorted_accounts; + use ethrex_trie::{NodeHash, trie_sorted::trie_from_sorted_accounts}; use std::thread::scope; struct RocksDBIterator<'a> { @@ -1719,7 +1719,7 @@ async fn insert_storage_into_rocksdb( .map(|num| num.into()) .unwrap_or(8); - let (buffer_sender, buffer_receiver) = bounded::>(1001); + let (buffer_sender, buffer_receiver) = bounded::>(1001); for _ in 0..1_000 { let _ = buffer_sender.send(Vec::with_capacity(20_065)); } diff --git a/crates/storage/trie_db/rocksdb.rs b/crates/storage/trie_db/rocksdb.rs index 2e1a013b9e4..70d1b29a07f 100644 --- a/crates/storage/trie_db/rocksdb.rs +++ b/crates/storage/trie_db/rocksdb.rs @@ -1,6 +1,6 @@ use ethrex_common::H256; use ethrex_rlp::encode::RLPEncode; -use ethrex_trie::{NodeHash, TrieDB, error::TrieError}; +use ethrex_trie::{Node, NodeHash, TrieDB, error::TrieError}; use rocksdb::{MultiThreaded, OptimisticTransactionDB}; use std::sync::Arc; @@ -41,7 +41,7 @@ impl RocksDBTrieDB { .ok_or_else(|| TrieError::DbError(anyhow::anyhow!("Column family not found"))) } - fn make_key(&self, node_hash: NodeHash) -> Vec { + fn make_key(&self, node_hash: &NodeHash) -> Vec { match &self.address_prefix { Some(address) => { // For storage tries, prefix with address @@ -60,7 +60,7 @@ impl RocksDBTrieDB { impl TrieDB for RocksDBTrieDB { fn get(&self, key: NodeHash) -> Result>, TrieError> { let cf = self.cf_handle()?; - let db_key = self.make_key(key); + let db_key = self.make_key(&key); self.db .get_cf(&cf, db_key) @@ -72,7 +72,7 @@ impl TrieDB for RocksDBTrieDB { let mut batch = rocksdb::WriteBatchWithTransaction::default(); for (key, value) in key_values { - let db_key = self.make_key(key); + let db_key = self.make_key(&key); batch.put_cf(&cf, db_key, value); } @@ -81,14 +81,14 @@ impl TrieDB for RocksDBTrieDB { .map_err(|e| TrieError::DbError(anyhow::anyhow!("RocksDB batch write error: {}", e))) } - fn put_batch_no_alloc(&self, key_values: &[ethrex_trie::Node]) -> Result<(), TrieError> { + fn put_batch_no_alloc(&self, key_values: &[(NodeHash, Node)]) -> Result<(), TrieError> { let cf = self.cf_handle()?; let mut batch = rocksdb::WriteBatchWithTransaction::default(); // 532 is the maximum size of an encoded branch node. let mut buffer = Vec::with_capacity(532); - for node in key_values { - let db_key = self.make_key(node.compute_hash()); + for (hash, node) in key_values { + let db_key = self.make_key(hash); buffer.clear(); node.encode(&mut buffer); batch.put_cf(&cf, db_key, &buffer); From ca3942ae6b358fa31e5dc3bc6c3ad122ccc9b685 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 30 Sep 2025 11:59:24 -0300 Subject: [PATCH 075/115] Removed cfg_if --- Cargo.lock | 1 - crates/networking/p2p/Cargo.toml | 1 - crates/networking/p2p/sync.rs | 81 ++++++++++++++------------- crates/networking/p2p/utils.rs | 94 ++++++++++++++++++-------------- 4 files changed, 96 insertions(+), 81 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c9e187b8052..7a5a9654072 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4318,7 +4318,6 @@ dependencies = [ "aes", "async-trait", "bytes", - "cfg-if 1.0.3", "concat-kdf", "crossbeam 0.8.4", "ctr", diff --git a/crates/networking/p2p/Cargo.toml b/crates/networking/p2p/Cargo.toml index 1c61ac16f3c..9ffda225263 100644 --- a/crates/networking/p2p/Cargo.toml +++ b/crates/networking/p2p/Cargo.toml @@ -32,7 +32,6 @@ spawned-rt.workspace = true spawned-concurrency.workspace = true sha2.workspace = true futures.workspace = true -cfg-if.workspace = true rocksdb = { workspace = true, optional = true } prometheus = "0.14.0" diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index db686a552d8..83988dea410 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -886,25 +886,28 @@ impl Syncer { *METRICS.account_tries_insert_start_time.lock().await = Some(SystemTime::now()); // We read the account leafs from the files in account_state_snapshots_dir, write it into // the trie to compute the nodes and stores the accounts with storages for later use - cfg_if::cfg_if! { - if #[cfg(feature = "rocksdb")] { - let computed_state_root = insert_accounts_into_rocksdb( - store.clone(), - &mut storage_accounts, - &account_state_snapshots_dir, - &crate::utils::get_rocksdb_temp_accounts_dir(&self.datadir), - &mut code_hash_collector, - ).await?; - let accounts_with_storage = std::collections::BTreeSet::from_iter(storage_accounts.accounts_with_storage_root.keys().copied()); - } else { - let computed_state_root = insert_accounts_into_db( - store.clone(), - &mut storage_accounts, - &account_state_snapshots_dir, - &mut code_hash_collector, - ).await?; - } - } + + #[cfg(feature = "rocksdb")] + let computed_state_root = insert_accounts_into_rocksdb( + store.clone(), + &mut storage_accounts, + &account_state_snapshots_dir, + &crate::utils::get_rocksdb_temp_accounts_dir(&self.datadir), + &mut code_hash_collector, + ) + .await?; + #[cfg(feature = "rocksdb")] + let accounts_with_storage = std::collections::BTreeSet::from_iter( + storage_accounts.accounts_with_storage_root.keys().copied(), + ); + #[cfg(not(feature = "rocksdb"))] + let computed_state_root = insert_accounts_into_db( + store.clone(), + &mut storage_accounts, + &account_state_snapshots_dir, + &mut code_hash_collector, + ) + .await?; info!( "Finished inserting account ranges, total storage accounts: {}", storage_accounts.accounts_with_storage_root.len() @@ -991,25 +994,27 @@ impl Syncer { "Inserting Storage Ranges - \x1b[31mWriting to DB\x1b[0m".to_string(); let account_storages_snapshots_dir = get_account_storages_snapshots_dir(&self.datadir); - cfg_if::cfg_if! { - if #[cfg(feature = "rocksdb")] { - insert_storage_into_rocksdb( - store.clone(), - accounts_with_storage, - &account_storages_snapshots_dir, - &crate::utils::get_rocksdb_temp_storage_dir(&self.datadir) - ).await?; - } else { - let maybe_big_account_storage_state_roots: Arc>> = - Arc::new(Mutex::new(HashMap::new())); - insert_storages_into_db( - store.clone(), - &account_storages_snapshots_dir, - &maybe_big_account_storage_state_roots, - &pivot_header, - ) - .await?; - } + #[cfg(feature = "rocksdb")] + { + insert_storage_into_rocksdb( + store.clone(), + accounts_with_storage, + &account_storages_snapshots_dir, + &crate::utils::get_rocksdb_temp_storage_dir(&self.datadir), + ) + .await?; + } + #[cfg(not(feature = "rocksdb"))] + { + let maybe_big_account_storage_state_roots: Arc>> = + Arc::new(Mutex::new(HashMap::new())); + insert_storages_into_db( + store.clone(), + &account_storages_snapshots_dir, + &maybe_big_account_storage_state_roots, + &pivot_header, + ) + .await?; } *METRICS.storage_tries_insert_end_time.lock().await = Some(SystemTime::now()); diff --git a/crates/networking/p2p/utils.rs b/crates/networking/p2p/utils.rs index 9b5549ea072..500d5f40904 100644 --- a/crates/networking/p2p/utils.rs +++ b/crates/networking/p2p/utils.rs @@ -114,25 +114,22 @@ pub fn dump_accounts_to_file( path: &Path, accounts: Vec<(H256, AccountState)>, ) -> Result<(), DumpError> { - cfg_if::cfg_if! { - if #[cfg(feature = "rocksdb")] { - dump_to_rocks_db( - path, - accounts - .into_iter() - .map(|(hash, state)| (hash.0.to_vec(), state.encode_to_vec()) - ).collect::>() - ) - .inspect_err(|err| error!("Rocksdb writing stt error {err:?}")) - .map_err(|_| DumpError { - path: path.to_path_buf(), - contents: Vec::new(), - error: std::io::ErrorKind::Other, - }) - } else { - dump_to_file(path, accounts.encode_to_vec()) - } - } + #[cfg(feature = "rocksdb")] + return dump_to_rocks_db( + path, + accounts + .into_iter() + .map(|(hash, state)| (hash.0.to_vec(), state.encode_to_vec())) + .collect::>(), + ) + .inspect_err(|err| error!("Rocksdb writing stt error {err:?}")) + .map_err(|_| DumpError { + path: path.to_path_buf(), + contents: Vec::new(), + error: std::io::ErrorKind::Other, + }); + #[cfg(not(feature = "rocksdb"))] + dump_to_file(path, accounts.encode_to_vec()) } /// Struct representing the storage slots of certain accounts that share the same storage root @@ -147,31 +144,46 @@ pub fn dump_storages_to_file( path: &Path, storages: Vec, ) -> Result<(), DumpError> { - cfg_if::cfg_if! { - if #[cfg(feature = "rocksdb")] { - dump_to_rocks_db( - path, - storages + #[cfg(feature = "rocksdb")] + return dump_to_rocks_db( + path, + storages + .into_iter() + .flat_map(|accounts_with_slots| { + accounts_with_slots + .accounts .into_iter() - .flat_map(|accounts_with_slots| { - accounts_with_slots.accounts.into_iter().map(|hash| { - accounts_with_slots.storages.iter().map(move |(slot_hash, slot_value)| { + .map(|hash| { + accounts_with_slots + .storages + .iter() + .map(move |(slot_hash, slot_value)| { let key = [hash.as_bytes(), slot_hash.as_bytes()].concat(); (key, slot_value.encode_to_vec()) - }).collect::>() - }).collect::>() - }).flatten().collect::>() - ) - .inspect_err(|err| error!("Rocksdb writing stt error {err:?}")) - .map_err(|_| DumpError { - path: path.to_path_buf(), - contents: Vec::new(), - error: std::io::ErrorKind::Other, - }) - } else { - dump_to_file(path, storages.into_iter().map(|accounts_with_slots| (accounts_with_slots.accounts, accounts_with_slots.storages)).collect::>().encode_to_vec()) - } - } + }) + .collect::>() + }) + .collect::>() + }) + .flatten() + .collect::>(), + ) + .inspect_err(|err| error!("Rocksdb writing stt error {err:?}")) + .map_err(|_| DumpError { + path: path.to_path_buf(), + contents: Vec::new(), + error: std::io::ErrorKind::Other, + }); + + #[cfg(not(feature = "rocksdb"))] + dump_to_file( + path, + storages + .into_iter() + .map(|accounts_with_slots| (accounts_with_slots.accounts, accounts_with_slots.storages)) + .collect::>() + .encode_to_vec(), + ) } /// TODO: make it more generic From ebcccd60e2ac5ef001e0a6921b645b798f14b2b2 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 30 Sep 2025 12:10:07 -0300 Subject: [PATCH 076/115] Update db.rs --- crates/common/trie/db.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/common/trie/db.rs b/crates/common/trie/db.rs index 4508e216371..0de923571e8 100644 --- a/crates/common/trie/db.rs +++ b/crates/common/trie/db.rs @@ -10,6 +10,7 @@ use std::{ pub trait TrieDB: Send + Sync { fn get(&self, key: NodeHash) -> Result>, TrieError>; fn put_batch(&self, key_values: Vec<(NodeHash, Vec)>) -> Result<(), TrieError>; + // TODO: replace putbatch with this function. fn put_batch_no_alloc(&self, key_values: &[(NodeHash, Node)]) -> Result<(), TrieError> { self.put_batch( key_values From f47bcb336ce181e462e747ed24c0ca5a0e1b1ebd Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 30 Sep 2025 12:14:10 -0300 Subject: [PATCH 077/115] Fixed buffer count --- crates/common/trie/trie_sorted.rs | 9 +++++---- crates/networking/p2p/sync.rs | 16 ++++++++-------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/crates/common/trie/trie_sorted.rs b/crates/common/trie/trie_sorted.rs index b2774c7dbc4..ddbae4b2937 100644 --- a/crates/common/trie/trie_sorted.rs +++ b/crates/common/trie/trie_sorted.rs @@ -41,7 +41,8 @@ pub enum TrieGenerationError { ThreadJoinError(), } -const SIZE_TO_WRITE_DB: u64 = 20_000; +pub const SIZE_TO_WRITE_DB: u64 = 20_000; +pub const BUFFER_COUNT: u64 = 100; impl CenterSide { fn from_value(tuple: (H256, Vec)) -> CenterSide { @@ -291,9 +292,9 @@ pub fn trie_from_sorted_accounts_wrap( where T: Iterator)> + Send, { - let (buffer_sender, buffer_receiver) = bounded::>(1001); - for _ in 0..1_000 { - let _ = buffer_sender.send(Vec::with_capacity(20_065)); + let (buffer_sender, buffer_receiver) = bounded::>(BUFFER_COUNT as usize); + for _ in 0..BUFFER_COUNT { + let _ = buffer_sender.send(Vec::with_capacity(SIZE_TO_WRITE_DB)); } scope(|s| { let pool = ThreadPool::new(12, s); diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 83988dea410..0789ea652cd 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1649,8 +1649,12 @@ async fn insert_storage_into_rocksdb( account_state_snapshots_dir: &Path, temp_db_dir: &Path, ) -> Result<(), SyncError> { + use crossbeam::channel::{bounded, unbounded}; use ethrex_threadpool::ThreadPool; - use ethrex_trie::{NodeHash, trie_sorted::trie_from_sorted_accounts}; + use ethrex_trie::{ + Node, NodeHash, + trie_sorted::{BUFFER_COUNT, SIZE_TO_WRITE_DB, trie_from_sorted_accounts}, + }; use std::thread::scope; struct RocksDBIterator<'a> { @@ -1714,19 +1718,15 @@ async fn insert_storage_into_rocksdb( }) .collect::>(); - use crossbeam::channel::{bounded, unbounded}; - - use ethrex_trie::Node; - let (sender, receiver) = unbounded::<()>(); let mut counter = 0; let thread_count = std::thread::available_parallelism() .map(|num| num.into()) .unwrap_or(8); - let (buffer_sender, buffer_receiver) = bounded::>(1001); - for _ in 0..1_000 { - let _ = buffer_sender.send(Vec::with_capacity(20_065)); + let (buffer_sender, buffer_receiver) = bounded::>(BUFFER_COUNT as usize); + for _ in 0..BUFFER_COUNT { + let _ = buffer_sender.send(Vec::with_capacity(SIZE_TO_WRITE_DB)); } scope(|scope| { From 60b9372de8788b03d6318773967df63711d98e9a Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 30 Sep 2025 12:15:50 -0300 Subject: [PATCH 078/115] Update trie_sorted.rs --- crates/common/trie/trie_sorted.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/common/trie/trie_sorted.rs b/crates/common/trie/trie_sorted.rs index ddbae4b2937..d207e29f7c0 100644 --- a/crates/common/trie/trie_sorted.rs +++ b/crates/common/trie/trie_sorted.rs @@ -294,7 +294,7 @@ where { let (buffer_sender, buffer_receiver) = bounded::>(BUFFER_COUNT as usize); for _ in 0..BUFFER_COUNT { - let _ = buffer_sender.send(Vec::with_capacity(SIZE_TO_WRITE_DB)); + let _ = buffer_sender.send(Vec::with_capacity(SIZE_TO_WRITE_DB as usize)); } scope(|s| { let pool = ThreadPool::new(12, s); From 2c79fe071609e18da88d0cbc2139204a24a24e61 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 30 Sep 2025 12:29:59 -0300 Subject: [PATCH 079/115] Update sync.rs --- crates/networking/p2p/sync.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 0789ea652cd..a2c3aee7e78 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1726,7 +1726,7 @@ async fn insert_storage_into_rocksdb( let (buffer_sender, buffer_receiver) = bounded::>(BUFFER_COUNT as usize); for _ in 0..BUFFER_COUNT { - let _ = buffer_sender.send(Vec::with_capacity(SIZE_TO_WRITE_DB)); + let _ = buffer_sender.send(Vec::with_capacity(SIZE_TO_WRITE_DB as usize)); } scope(|scope| { From 7a14f4240500ee9aa7c7f2498e9d2ca2de04b8e8 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 30 Sep 2025 12:46:34 -0300 Subject: [PATCH 080/115] Optimized insertion to disk --- crates/networking/p2p/utils.rs | 68 +++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 25 deletions(-) diff --git a/crates/networking/p2p/utils.rs b/crates/networking/p2p/utils.rs index 500d5f40904..67c9d1a79ff 100644 --- a/crates/networking/p2p/utils.rs +++ b/crates/networking/p2p/utils.rs @@ -73,21 +73,48 @@ pub fn get_account_storages_snapshot_file(directory: &Path, chunk_index: u64) -> } #[cfg(feature = "rocksdb")] -pub fn dump_to_rocks_db( +pub fn dump_accounts_to_rocks_db( path: &Path, - mut contents: Vec<(Vec, Vec)>, + mut contents: Vec<(H256, AccountState)>, ) -> Result<(), rocksdb::Error> { - contents.sort(); + contents.sort_by_key(|(k, _)| *k); contents.dedup_by_key(|(k, _)| { - let mut buf = [0u8; 64]; - buf[..k.len()].copy_from_slice(k); + let mut buf = [0u8; 32]; + buf[..32].copy_from_slice(&k.0); buf }); + let mut buffer: Vec = Vec::new(); + let writer_options = rocksdb::Options::default(); + let mut writer = rocksdb::SstFileWriter::create(&writer_options); + writer.open(std::path::Path::new(&path))?; + for (key, acccount) in contents { + buffer.clear(); + acccount.encode(&mut buffer); + writer.put(key.0.as_ref(), buffer.as_slice())?; + } + writer.finish() +} + +#[cfg(feature = "rocksdb")] +pub fn dump_storages_to_rocks_db( + path: &Path, + mut contents: Vec<(H256, H256, U256)>, +) -> Result<(), rocksdb::Error> { + contents.sort(); + contents.dedup_by_key(|(k0, k1, _)| { + let mut buffer = [0_u8; 64]; + buffer[0..32].copy_from_slice(&k0.0); + buffer[32..64].copy_from_slice(&k1.0); + buffer + }); let writer_options = rocksdb::Options::default(); let mut writer = rocksdb::SstFileWriter::create(&writer_options); + let mut buffer_key = [0_u8; 64]; writer.open(std::path::Path::new(&path))?; - for values in contents { - writer.put(values.0, values.1)?; + for (account, slot_hash, slot_value) in contents { + buffer_key[0..32].copy_from_slice(&account.0); + buffer_key[32..64].copy_from_slice(&slot_hash.0); + writer.put(buffer_key.as_ref(), slot_value.to_big_endian())?; } writer.finish() } @@ -115,19 +142,13 @@ pub fn dump_accounts_to_file( accounts: Vec<(H256, AccountState)>, ) -> Result<(), DumpError> { #[cfg(feature = "rocksdb")] - return dump_to_rocks_db( - path, - accounts - .into_iter() - .map(|(hash, state)| (hash.0.to_vec(), state.encode_to_vec())) - .collect::>(), - ) - .inspect_err(|err| error!("Rocksdb writing stt error {err:?}")) - .map_err(|_| DumpError { - path: path.to_path_buf(), - contents: Vec::new(), - error: std::io::ErrorKind::Other, - }); + return dump_accounts_to_rocks_db(path, accounts) + .inspect_err(|err| error!("Rocksdb writing stt error {err:?}")) + .map_err(|_| DumpError { + path: path.to_path_buf(), + contents: Vec::new(), + error: std::io::ErrorKind::Other, + }); #[cfg(not(feature = "rocksdb"))] dump_to_file(path, accounts.encode_to_vec()) } @@ -145,7 +166,7 @@ pub fn dump_storages_to_file( storages: Vec, ) -> Result<(), DumpError> { #[cfg(feature = "rocksdb")] - return dump_to_rocks_db( + return dump_storages_to_rocks_db( path, storages .into_iter() @@ -157,10 +178,7 @@ pub fn dump_storages_to_file( accounts_with_slots .storages .iter() - .map(move |(slot_hash, slot_value)| { - let key = [hash.as_bytes(), slot_hash.as_bytes()].concat(); - (key, slot_value.encode_to_vec()) - }) + .map(move |(slot_hash, slot_value)| (hash, *slot_hash, *slot_value)) .collect::>() }) .collect::>() From c02f069f688791d06c2835919519a4a7c88d443d Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 30 Sep 2025 12:50:05 -0300 Subject: [PATCH 081/115] fixed potential bug --- crates/networking/p2p/utils.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/networking/p2p/utils.rs b/crates/networking/p2p/utils.rs index 67c9d1a79ff..52b5740cfa3 100644 --- a/crates/networking/p2p/utils.rs +++ b/crates/networking/p2p/utils.rs @@ -110,11 +110,13 @@ pub fn dump_storages_to_rocks_db( let writer_options = rocksdb::Options::default(); let mut writer = rocksdb::SstFileWriter::create(&writer_options); let mut buffer_key = [0_u8; 64]; + let mut buffer_storage: Vec = Vec::new(); writer.open(std::path::Path::new(&path))?; for (account, slot_hash, slot_value) in contents { buffer_key[0..32].copy_from_slice(&account.0); buffer_key[32..64].copy_from_slice(&slot_hash.0); - writer.put(buffer_key.as_ref(), slot_value.to_big_endian())?; + slot_value.encode(&mut buffer_storage); + writer.put(buffer_key.as_ref(), buffer_storage.as_slice())?; } writer.finish() } From 60f0bc69d5800b0d79a0ad964e31e030212d6b9f Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 30 Sep 2025 14:16:17 -0300 Subject: [PATCH 082/115] added debug logging --- crates/networking/p2p/utils.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/crates/networking/p2p/utils.rs b/crates/networking/p2p/utils.rs index 52b5740cfa3..4979f1c4529 100644 --- a/crates/networking/p2p/utils.rs +++ b/crates/networking/p2p/utils.rs @@ -100,25 +100,35 @@ pub fn dump_storages_to_rocks_db( path: &Path, mut contents: Vec<(H256, H256, U256)>, ) -> Result<(), rocksdb::Error> { + use tracing::info; + + info!("dump_storages_to_rocks_db before sort"); contents.sort(); + info!("dump_storages_to_rocks_db before dedup"); contents.dedup_by_key(|(k0, k1, _)| { let mut buffer = [0_u8; 64]; buffer[0..32].copy_from_slice(&k0.0); buffer[32..64].copy_from_slice(&k1.0); buffer }); + info!("dump_storages_to_rocks_db before create file"); let writer_options = rocksdb::Options::default(); let mut writer = rocksdb::SstFileWriter::create(&writer_options); let mut buffer_key = [0_u8; 64]; let mut buffer_storage: Vec = Vec::new(); + info!("dump_storages_to_rocks_db before open file"); writer.open(std::path::Path::new(&path))?; + info!("dump_storages_to_rocks_db before write file"); for (account, slot_hash, slot_value) in contents { buffer_key[0..32].copy_from_slice(&account.0); buffer_key[32..64].copy_from_slice(&slot_hash.0); slot_value.encode(&mut buffer_storage); writer.put(buffer_key.as_ref(), buffer_storage.as_slice())?; } - writer.finish() + info!("dump_storages_to_rocks_db before finish file"); + writer.finish(); + info!("dump_storages_to_rocks_db after finish file"); + Ok(()) } pub fn get_code_hashes_snapshots_dir(datadir: &Path) -> PathBuf { From 4528ac0987d76e9114117c80a8c9f5f140c72270 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 30 Sep 2025 14:16:56 -0300 Subject: [PATCH 083/115] Update utils.rs --- crates/networking/p2p/utils.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/networking/p2p/utils.rs b/crates/networking/p2p/utils.rs index 4979f1c4529..7dff398adfc 100644 --- a/crates/networking/p2p/utils.rs +++ b/crates/networking/p2p/utils.rs @@ -126,7 +126,7 @@ pub fn dump_storages_to_rocks_db( writer.put(buffer_key.as_ref(), buffer_storage.as_slice())?; } info!("dump_storages_to_rocks_db before finish file"); - writer.finish(); + writer.finish()?; info!("dump_storages_to_rocks_db after finish file"); Ok(()) } From c8c00bc85f2e1a3fa58fc9c714ddfff4a6d00072 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 30 Sep 2025 14:29:03 -0300 Subject: [PATCH 084/115] fix --- crates/networking/p2p/utils.rs | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/crates/networking/p2p/utils.rs b/crates/networking/p2p/utils.rs index 7dff398adfc..e0afa8b7cfb 100644 --- a/crates/networking/p2p/utils.rs +++ b/crates/networking/p2p/utils.rs @@ -102,33 +102,26 @@ pub fn dump_storages_to_rocks_db( ) -> Result<(), rocksdb::Error> { use tracing::info; - info!("dump_storages_to_rocks_db before sort"); contents.sort(); - info!("dump_storages_to_rocks_db before dedup"); contents.dedup_by_key(|(k0, k1, _)| { let mut buffer = [0_u8; 64]; buffer[0..32].copy_from_slice(&k0.0); buffer[32..64].copy_from_slice(&k1.0); buffer }); - info!("dump_storages_to_rocks_db before create file"); let writer_options = rocksdb::Options::default(); let mut writer = rocksdb::SstFileWriter::create(&writer_options); let mut buffer_key = [0_u8; 64]; let mut buffer_storage: Vec = Vec::new(); - info!("dump_storages_to_rocks_db before open file"); writer.open(std::path::Path::new(&path))?; - info!("dump_storages_to_rocks_db before write file"); for (account, slot_hash, slot_value) in contents { buffer_key[0..32].copy_from_slice(&account.0); buffer_key[32..64].copy_from_slice(&slot_hash.0); + buffer_storage.clear(); slot_value.encode(&mut buffer_storage); writer.put(buffer_key.as_ref(), buffer_storage.as_slice())?; } - info!("dump_storages_to_rocks_db before finish file"); - writer.finish()?; - info!("dump_storages_to_rocks_db after finish file"); - Ok(()) + writer.finish() } pub fn get_code_hashes_snapshots_dir(datadir: &Path) -> PathBuf { From 939ac99e98a9e828deedc5dd6bc6dc831efd6ca3 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 30 Sep 2025 14:54:37 -0300 Subject: [PATCH 085/115] Lint --- crates/networking/p2p/utils.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/crates/networking/p2p/utils.rs b/crates/networking/p2p/utils.rs index e0afa8b7cfb..d3145974cc6 100644 --- a/crates/networking/p2p/utils.rs +++ b/crates/networking/p2p/utils.rs @@ -100,8 +100,6 @@ pub fn dump_storages_to_rocks_db( path: &Path, mut contents: Vec<(H256, H256, U256)>, ) -> Result<(), rocksdb::Error> { - use tracing::info; - contents.sort(); contents.dedup_by_key(|(k0, k1, _)| { let mut buffer = [0_u8; 64]; From dda32e4f5b91704e488e53fd3dfa48801b6d98f6 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 30 Sep 2025 15:14:09 -0300 Subject: [PATCH 086/115] Cleaned up code --- crates/networking/p2p/sync.rs | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index a2c3aee7e78..6925b7e930b 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -888,18 +888,21 @@ impl Syncer { // the trie to compute the nodes and stores the accounts with storages for later use #[cfg(feature = "rocksdb")] - let computed_state_root = insert_accounts_into_rocksdb( - store.clone(), - &mut storage_accounts, - &account_state_snapshots_dir, - &crate::utils::get_rocksdb_temp_accounts_dir(&self.datadir), - &mut code_hash_collector, - ) - .await?; - #[cfg(feature = "rocksdb")] - let accounts_with_storage = std::collections::BTreeSet::from_iter( - storage_accounts.accounts_with_storage_root.keys().copied(), - ); + let (computed_state_root, accounts_with_storage) = { + ( + insert_accounts_into_rocksdb( + store.clone(), + &mut storage_accounts, + &account_state_snapshots_dir, + &crate::utils::get_rocksdb_temp_accounts_dir(&self.datadir), + &mut code_hash_collector, + ) + .await?, + std::collections::BTreeSet::from_iter( + storage_accounts.accounts_with_storage_root.keys().copied(), + ), + ) + }; #[cfg(not(feature = "rocksdb"))] let computed_state_root = insert_accounts_into_db( store.clone(), @@ -1006,12 +1009,9 @@ impl Syncer { } #[cfg(not(feature = "rocksdb"))] { - let maybe_big_account_storage_state_roots: Arc>> = - Arc::new(Mutex::new(HashMap::new())); insert_storages_into_db( store.clone(), &account_storages_snapshots_dir, - &maybe_big_account_storage_state_roots, &pivot_header, ) .await?; @@ -1516,10 +1516,11 @@ async fn insert_accounts_into_db( async fn insert_storages_into_db( store: Store, account_storages_snapshots_dir: &Path, - maybe_big_account_storage_state_roots: &Arc>>, pivot_header: &BlockHeader, ) -> Result<(), SyncError> { use rayon::iter::IntoParallelIterator; + let maybe_big_account_storage_state_roots: Arc>> = + Arc::new(Mutex::new(HashMap::new())); for entry in std::fs::read_dir(account_storages_snapshots_dir) .map_err(|_| SyncError::AccountStoragesSnapshotsDirNotFound)? From fd55d4718dd9d15edaa282c25734aca40d4930ec Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 30 Sep 2025 15:19:13 -0300 Subject: [PATCH 087/115] Update Cargo.lock --- crates/l2/tee/quote-gen/Cargo.lock | 71 +++++++++++++++++++----------- 1 file changed, 46 insertions(+), 25 deletions(-) diff --git a/crates/l2/tee/quote-gen/Cargo.lock b/crates/l2/tee/quote-gen/Cargo.lock index a3631f96bbd..c6ceafb55a3 100644 --- a/crates/l2/tee/quote-gen/Cargo.lock +++ b/crates/l2/tee/quote-gen/Cargo.lock @@ -1091,13 +1091,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69323bff1fb41c635347b8ead484a5ca6c3f11914d784170b158d8449ab07f8e" dependencies = [ "cfg-if 0.1.10", - "crossbeam-channel", + "crossbeam-channel 0.4.4", "crossbeam-deque 0.7.4", "crossbeam-epoch 0.8.2", - "crossbeam-queue", + "crossbeam-queue 0.2.3", "crossbeam-utils 0.7.2", ] +[[package]] +name = "crossbeam" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" +dependencies = [ + "crossbeam-channel 0.5.15", + "crossbeam-deque 0.8.6", + "crossbeam-epoch 0.9.18", + "crossbeam-queue 0.3.12", + "crossbeam-utils 0.8.21", +] + [[package]] name = "crossbeam-channel" version = "0.4.4" @@ -1108,6 +1121,15 @@ dependencies = [ "maybe-uninit", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils 0.8.21", +] + [[package]] name = "crossbeam-deque" version = "0.7.4" @@ -1164,6 +1186,15 @@ dependencies = [ "maybe-uninit", ] +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils 0.8.21", +] + [[package]] name = "crossbeam-utils" version = "0.7.2" @@ -2049,7 +2080,6 @@ dependencies = [ "ethrex-rlp", "ethrex-trie", "hex", - "keccak-hash", "kzg-rs", "lazy_static", "once_cell", @@ -2073,7 +2103,6 @@ dependencies = [ "ethrex-common", "ethrex-p2p", "hex", - "keccak-hash", "serde", "serde_json", ] @@ -2092,7 +2121,6 @@ dependencies = [ "ethrex-rpc", "hex", "jsonwebtoken 9.3.1", - "keccak-hash", "reqwest 0.12.23", "serde", "serde_json", @@ -2107,6 +2135,7 @@ name = "ethrex-l2" version = "0.1.0" dependencies = [ "aligned-sdk", + "axum", "bincode", "bytes", "cfg-if 1.0.3", @@ -2138,7 +2167,6 @@ dependencies = [ "guest_program", "hex", "jsonwebtoken 9.3.1", - "keccak-hash", "lazy_static", "rand 0.8.5", "ratatui", @@ -2171,7 +2199,6 @@ dependencies = [ "ethrex-storage", "ethrex-trie", "ethrex-vm", - "keccak-hash", "lambdaworks-crypto 0.11.0", "secp256k1", "serde", @@ -2195,7 +2222,6 @@ dependencies = [ "ethrex-storage", "ethrex-storage-rollup", "hex", - "keccak-hash", "reqwest 0.12.23", "rustc-hex", "secp256k1", @@ -2224,7 +2250,6 @@ dependencies = [ "ethrex-crypto", "ethrex-rlp", "k256", - "keccak-hash", "lambdaworks-math 0.11.0", "lazy_static", "malachite", @@ -2259,6 +2284,7 @@ dependencies = [ "async-trait", "bytes", "concat-kdf", + "crossbeam 0.8.4", "ctr", "ethereum-types 0.15.1", "ethrex-blockchain", @@ -2266,11 +2292,11 @@ dependencies = [ "ethrex-rlp", "ethrex-storage", "ethrex-storage-rollup", + "ethrex-threadpool", "ethrex-trie", "futures", "hex", "hmac", - "keccak-hash", "lazy_static", "prometheus", "rand 0.8.5", @@ -2322,7 +2348,6 @@ dependencies = [ "ethrex-vm", "hex", "jsonwebtoken 9.3.1", - "keccak-hash", "rand 0.8.5", "reqwest 0.12.23", "secp256k1", @@ -2354,7 +2379,6 @@ dependencies = [ "eyre", "hex", "itertools 0.13.0", - "keccak-hash", "lazy_static", "reqwest 0.12.23", "secp256k1", @@ -2411,15 +2435,24 @@ dependencies = [ "tracing", ] +[[package]] +name = "ethrex-threadpool" +version = "0.1.0" +dependencies = [ + "crossbeam 0.8.4", +] + [[package]] name = "ethrex-trie" version = "0.1.0" dependencies = [ "anyhow", "bytes", + "crossbeam 0.8.4", "digest", "ethereum-types 0.15.1", "ethrex-rlp", + "ethrex-threadpool", "hex", "lazy_static", "serde", @@ -2821,7 +2854,6 @@ dependencies = [ "ethrex-trie", "ethrex-vm", "hex", - "keccak-hash", "rkyv", "serde", "serde_json", @@ -3564,16 +3596,6 @@ dependencies = [ "cpufeatures", ] -[[package]] -name = "keccak-hash" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e1b8590eb6148af2ea2d75f38e7d29f5ca970d5a4df456b3ef19b8b415d0264" -dependencies = [ - "primitive-types 0.13.1", - "tiny-keccak", -] - [[package]] name = "kzg-rs" version = "0.2.7" @@ -4705,7 +4727,6 @@ dependencies = [ "ethrex-vm", "guest_program", "hex", - "keccak-hash", "secp256k1", "serde", "serde_json", @@ -5809,7 +5830,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76347472cc448d47dbf9f67541fde19dbb054793e8e0546ce8917bfb695e1b56" dependencies = [ - "crossbeam", + "crossbeam 0.7.3", "tokio", "tokio-stream", "tokio-util", From 558c5f0c674f2472ccbe583a29fd37ff394c58c9 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 30 Sep 2025 16:28:58 -0300 Subject: [PATCH 088/115] simplified --- crates/networking/p2p/sync.rs | 93 +++++++++++++++-------------------- 1 file changed, 39 insertions(+), 54 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 6925b7e930b..1c64f95a530 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -28,7 +28,7 @@ use ethrex_trie::{Trie, TrieError}; use rayon::iter::{ParallelBridge, ParallelIterator}; #[cfg(not(feature = "rocksdb"))] use std::collections::hash_map::Entry; -use std::collections::{BTreeMap, HashSet}; +use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::path::{Path, PathBuf}; #[cfg(not(feature = "rocksdb"))] use std::sync::Mutex; @@ -887,27 +887,13 @@ impl Syncer { // We read the account leafs from the files in account_state_snapshots_dir, write it into // the trie to compute the nodes and stores the accounts with storages for later use - #[cfg(feature = "rocksdb")] - let (computed_state_root, accounts_with_storage) = { - ( - insert_accounts_into_rocksdb( - store.clone(), - &mut storage_accounts, - &account_state_snapshots_dir, - &crate::utils::get_rocksdb_temp_accounts_dir(&self.datadir), - &mut code_hash_collector, - ) - .await?, - std::collections::BTreeSet::from_iter( - storage_accounts.accounts_with_storage_root.keys().copied(), - ), - ) - }; - #[cfg(not(feature = "rocksdb"))] - let computed_state_root = insert_accounts_into_db( + // Variable `accounts_with_storage` unused if not in rocksdb + #[allow(unused_variables)] + let (computed_state_root, accounts_with_storage) = insert_accounts( store.clone(), &mut storage_accounts, &account_state_snapshots_dir, + &self.datadir, &mut code_hash_collector, ) .await?; @@ -997,25 +983,14 @@ impl Syncer { "Inserting Storage Ranges - \x1b[31mWriting to DB\x1b[0m".to_string(); let account_storages_snapshots_dir = get_account_storages_snapshots_dir(&self.datadir); - #[cfg(feature = "rocksdb")] - { - insert_storage_into_rocksdb( - store.clone(), - accounts_with_storage, - &account_storages_snapshots_dir, - &crate::utils::get_rocksdb_temp_storage_dir(&self.datadir), - ) - .await?; - } - #[cfg(not(feature = "rocksdb"))] - { - insert_storages_into_db( - store.clone(), - &account_storages_snapshots_dir, - &pivot_header, - ) - .await?; - } + insert_storages( + store.clone(), + accounts_with_storage, + &account_storages_snapshots_dir, + &self.datadir, + &pivot_header, + ) + .await?; *METRICS.storage_tries_insert_end_time.lock().await = Some(SystemTime::now()); @@ -1452,12 +1427,13 @@ pub async fn validate_bytecodes(store: Store, state_root: H256) -> bool { } #[cfg(not(feature = "rocksdb"))] -async fn insert_accounts_into_db( +async fn insert_accounts( store: Store, storage_accounts: &mut AccountStorageRoots, account_state_snapshots_dir: &Path, + _: &Path, code_hash_collector: &mut CodeHashCollector, -) -> Result { +) -> Result<(H256, BTreeSet), SyncError> { let mut computed_state_root = *EMPTY_TRIE_HASH; for entry in std::fs::read_dir(account_state_snapshots_dir) .map_err(|_| SyncError::AccountStateSnapshotsDirNotFound)? @@ -1509,13 +1485,15 @@ async fn insert_accounts_into_db( computed_state_root = current_state_root?; } info!("computed_state_root {computed_state_root}"); - Ok(computed_state_root) + Ok((computed_state_root, BTreeSet::new())) } #[cfg(not(feature = "rocksdb"))] -async fn insert_storages_into_db( +async fn insert_storages( store: Store, + _: BTreeSet, account_storages_snapshots_dir: &Path, + _: &Path, pivot_header: &BlockHeader, ) -> Result<(), SyncError> { use rayon::iter::IntoParallelIterator; @@ -1588,19 +1566,20 @@ async fn insert_storages_into_db( } #[cfg(feature = "rocksdb")] -async fn insert_accounts_into_rocksdb( +async fn insert_accounts( store: Store, storage_accounts: &mut AccountStorageRoots, account_state_snapshots_dir: &Path, - temp_db_dir: &Path, + datadir: &Path, code_hash_collector: &mut CodeHashCollector, -) -> Result { +) -> Result<(H256, BTreeSet), SyncError> { + use crate::utils::get_rocksdb_temp_accounts_dir; use ethrex_trie::trie_sorted::trie_from_sorted_accounts_wrap; let trie = store.open_state_trie(*EMPTY_TRIE_HASH)?; let mut db_options = rocksdb::Options::default(); db_options.create_if_missing(true); - let db = rocksdb::DB::open(&db_options, temp_db_dir) + let db = rocksdb::DB::open(&db_options, get_rocksdb_temp_accounts_dir(datadir)) .map_err(|_| SyncError::AccountTempDBDirNotFound)?; let file_paths: Vec = std::fs::read_dir(account_state_snapshots_dir) .map_err(|_| SyncError::AccountStateSnapshotsDirNotFound)? @@ -1622,7 +1601,7 @@ async fn insert_accounts_into_rocksdb( } let iter = db.full_iterator(rocksdb::IteratorMode::Start); - trie_from_sorted_accounts_wrap( + let compute_state_root = trie_from_sorted_accounts_wrap( trie.db(), &mut iter .map(|k| k.expect("We shouldn't have a rocksdb error here")) // TODO: remove unwrap @@ -1640,16 +1619,22 @@ async fn insert_accounts_into_rocksdb( }) .map(|(k, v)| (H256::from_slice(&k), v.to_vec())), ) - .map_err(SyncError::TrieGenerationError) + .map_err(SyncError::TrieGenerationError)?; + + let accounts_with_storage = + BTreeSet::from_iter(storage_accounts.accounts_with_storage_root.keys().copied()); + Ok((compute_state_root, accounts_with_storage)) } #[cfg(feature = "rocksdb")] -async fn insert_storage_into_rocksdb( +async fn insert_storages( store: Store, - accounts_with_storage: std::collections::BTreeSet, - account_state_snapshots_dir: &Path, - temp_db_dir: &Path, + accounts_with_storage: BTreeSet, + account_storages_snapshots_dir: &Path, + datadir: &Path, + _: &BlockHeader, ) -> Result<(), SyncError> { + use crate::utils::get_rocksdb_temp_storage_dir; use crossbeam::channel::{bounded, unbounded}; use ethrex_threadpool::ThreadPool; use ethrex_trie::{ @@ -1694,9 +1679,9 @@ async fn insert_storage_into_rocksdb( let mut db_options = rocksdb::Options::default(); db_options.create_if_missing(true); - let db = rocksdb::DB::open(&db_options, temp_db_dir) + let db = rocksdb::DB::open(&db_options, get_rocksdb_temp_storage_dir(datadir)) .map_err(|_| SyncError::StorageTempDBDirNotFound)?; - let file_paths: Vec = std::fs::read_dir(account_state_snapshots_dir) + let file_paths: Vec = std::fs::read_dir(account_storages_snapshots_dir) .map_err(|_| SyncError::AccountStoragesSnapshotsDirNotFound)? .collect::, _>>() .map_err(|_| SyncError::AccountStoragesSnapshotsDirNotFound)? From 3bf7b01a983d5d4f0d08e3de02196eab01aa77e3 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 30 Sep 2025 17:19:35 -0300 Subject: [PATCH 089/115] Lowered memory usage further (hopefully) --- crates/common/trie/trie_sorted.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/common/trie/trie_sorted.rs b/crates/common/trie/trie_sorted.rs index d207e29f7c0..e6579bc5f50 100644 --- a/crates/common/trie/trie_sorted.rs +++ b/crates/common/trie/trie_sorted.rs @@ -42,7 +42,7 @@ pub enum TrieGenerationError { } pub const SIZE_TO_WRITE_DB: u64 = 20_000; -pub const BUFFER_COUNT: u64 = 100; +pub const BUFFER_COUNT: u64 = 32; impl CenterSide { fn from_value(tuple: (H256, Vec)) -> CenterSide { From a65967037993b1f99716eaceb3d3e537d9cd55c4 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 30 Sep 2025 17:24:09 -0300 Subject: [PATCH 090/115] Update Cargo.lock --- .../src/guest_program/src/risc0/Cargo.lock | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/crates/l2/prover/src/guest_program/src/risc0/Cargo.lock b/crates/l2/prover/src/guest_program/src/risc0/Cargo.lock index 9c44ce4298d..d733921f4a5 100644 --- a/crates/l2/prover/src/guest_program/src/risc0/Cargo.lock +++ b/crates/l2/prover/src/guest_program/src/risc0/Cargo.lock @@ -795,6 +795,28 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -814,6 +836,15 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -1276,15 +1307,24 @@ dependencies = [ "tracing", ] +[[package]] +name = "ethrex-threadpool" +version = "0.1.0" +dependencies = [ + "crossbeam", +] + [[package]] name = "ethrex-trie" version = "0.1.0" dependencies = [ "anyhow", "bytes", + "crossbeam", "digest", "ethereum-types", "ethrex-rlp", + "ethrex-threadpool", "hex", "lazy_static", "serde", From b9c013852b19aec8a940f314852b9cf69b29286f Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Tue, 30 Sep 2025 17:27:26 -0300 Subject: [PATCH 091/115] Perf changelog --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b85400bbc1..d453183f90a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ ## Perf +### 2025-09-30 + +- Downloading all slots of big accounts during the initial leaves download step of snap sync [#4689](https://github.com/lambdaclass/ethrex/pull/4689) +- Inserting intelligently accounts with the same state root and few (<= slots) [#4689](https://github.com/lambdaclass/ethrex/pull/4689) +- Improving the performance of state trie through an ordered insertion algorithm [#4689](https://github.com/lambdaclass/ethrex/pull/4689) + ### 2025-09-24 - Avoid dumping empty storage accounts to disk [#4590](https://github.com/lambdaclass/ethrex/pull/4590) From c07938469c4ae9e6040182a8e9f8f6ecddbb3122 Mon Sep 17 00:00:00 2001 From: Javier Chatruc Date: Wed, 1 Oct 2025 12:27:53 -0300 Subject: [PATCH 092/115] Remove comments and commented code --- crates/networking/p2p/peer_handler.rs | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/crates/networking/p2p/peer_handler.rs b/crates/networking/p2p/peer_handler.rs index c50aa2c3aa5..b9b3e129660 100644 --- a/crates/networking/p2p/peer_handler.rs +++ b/crates/networking/p2p/peer_handler.rs @@ -1288,21 +1288,6 @@ impl PeerHandler { let chunk_size = 300; let chunk_count = (accounts_by_root_hash.len() / chunk_size) + 1; - // TODO: - // To download repeated tries only once, we can group by root_hash so - // we download one address and then store N times (for simpler insertion/healing). - // Take care of doing it inside this function to avoid confusion between - // pivots. - // At a later time we might try to also store only once and insert for all. - // That should help at least to do less `compute_storage_roots`, but skipping - // that might be problematic for healing. - // We can also sort by decreasing number of repetitions, so we download - // and settle the most common first. - // AFTER: try to reduce memory usage from account filtering. - // It currently takes about 68B per account with storages, with ~25M of them, - // meaning 1.7GB. Possibly several copies of this. - // THEN: review storage formats, maybe play with memory mapped data. - // list of tasks to be executed // Types are (start_index, end_index, starting_hash) // NOTE: end_index is NOT inclusive @@ -1319,11 +1304,6 @@ impl PeerHandler { }); } - // let all_account_hashes: Vec = account_storage_roots - // .accounts_with_storage_root - // .keys() - // .copied() - // .collect(); // channel to send the tasks to the peers let (task_sender, mut task_receiver) = tokio::sync::mpsc::channel::(1000); From a4978705e5786544573bf3be4a3cca8e4da5d807 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Rodr=C3=ADguez=20Chatruc?= <49622509+jrchatruc@users.noreply.github.com> Date: Wed, 1 Oct 2025 12:29:37 -0300 Subject: [PATCH 093/115] Update crates/networking/p2p/peer_handler.rs Co-authored-by: Mario Rugiero --- crates/networking/p2p/peer_handler.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/networking/p2p/peer_handler.rs b/crates/networking/p2p/peer_handler.rs index b9b3e129660..6b1cb48b8aa 100644 --- a/crates/networking/p2p/peer_handler.rs +++ b/crates/networking/p2p/peer_handler.rs @@ -1325,7 +1325,7 @@ impl PeerHandler { loop { if current_account_storages .values() - .map(|accounts| 32 * accounts.accounts.len() + 32 * accounts.storages.len()) + .map(|accounts| 32 * accounts.accounts.len() + 64 * accounts.storages.len()) .sum::() > RANGE_FILE_CHUNK_SIZE { From 31652d89683fd7e07f70ed4c4a79f47ec9c5b2d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Rodr=C3=ADguez=20Chatruc?= <49622509+jrchatruc@users.noreply.github.com> Date: Wed, 1 Oct 2025 12:30:33 -0300 Subject: [PATCH 094/115] Update CHANGELOG.md Co-authored-by: Mario Rugiero --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 236ad28bfde..bf019e2e058 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ ### 2025-09-30 - Downloading all slots of big accounts during the initial leaves download step of snap sync [#4689](https://github.com/lambdaclass/ethrex/pull/4689) -- Inserting intelligently accounts with the same state root and few (<= slots) [#4689](https://github.com/lambdaclass/ethrex/pull/4689) +- Downloading and inserting intelligently accounts with the same state root and few (<= slots) [#4689](https://github.com/lambdaclass/ethrex/pull/4689) - Improving the performance of state trie through an ordered insertion algorithm [#4689](https://github.com/lambdaclass/ethrex/pull/4689) ### 2025-09-29 From bbbb13730e9efb2541f21781512f01b26f0a2809 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Wed, 1 Oct 2025 16:08:32 -0300 Subject: [PATCH 095/115] initial chunk test --- crates/networking/p2p/peer_handler.rs | 131 ++++++++++++++++++-------- 1 file changed, 92 insertions(+), 39 deletions(-) diff --git a/crates/networking/p2p/peer_handler.rs b/crates/networking/p2p/peer_handler.rs index 6b1cb48b8aa..608e306291e 100644 --- a/crates/networking/p2p/peer_handler.rs +++ b/crates/networking/p2p/peer_handler.rs @@ -66,6 +66,8 @@ pub const SNAP_LIMIT: usize = 128; // increasing them may be the cause of peers disconnection pub const MAX_BLOCK_BODIES_TO_REQUEST: usize = 128; +const STORAGE_ROOTS_PER_CHUNK: usize = 300; + /// An abstraction over the [Kademlia] containing logic to make requests to peers #[derive(Debug, Clone)] pub struct PeerHandler { @@ -1260,33 +1262,73 @@ impl PeerHandler { *METRICS.current_step.lock().await = "Requesting Storage Ranges".to_string(); debug!("Starting request_storage_ranges function"); // 1) split the range in chunks of same length - let mut accounts_by_root_hash: BTreeMap<_, Vec<_>> = BTreeMap::new(); - for (account, (maybe_root_hash, _)) in &account_storage_roots.accounts_with_storage_root { - match maybe_root_hash { - Some(root) => { - accounts_by_root_hash - .entry(*root) - .or_default() - .push(*account); - } + let account_root_pairs: Vec<(H256, Option)> = account_storage_roots + .accounts_with_storage_root + .iter() + .map(|(account, (maybe_root_hash, _))| (*account, *maybe_root_hash)) + .collect(); + let mut chunk_groups: BTreeMap> = BTreeMap::new(); + + for (account, maybe_root_hash) in account_root_pairs { + let root = match maybe_root_hash { + Some(root) => root, None => { - let root = store - .get_account_state_by_acc_hash(pivot_header.hash(), *account) + store + .get_account_state_by_acc_hash(pivot_header.hash(), account) .expect("Failed to get account in state trie") - .expect("Could not find account that should have been downloaded or healed") - .storage_root; - accounts_by_root_hash - .entry(root) - .or_default() - .push(*account); + .expect( + "Could not find account that should have been downloaded or healed", + ) + .storage_root } + }; + + chunk_groups.entry(root).or_default().push(account); + + if chunk_groups.len() >= STORAGE_ROOTS_PER_CHUNK { + let chunk_accounts = Vec::from_iter(chunk_groups.into_iter()); + self.process_storage_chunk( + chunk_accounts, + account_storage_roots, + account_storages_snapshots_dir, + &mut chunk_index, + pivot_header, + ) + .await?; + chunk_groups = BTreeMap::new(); } } - let mut accounts_by_root_hash = Vec::from_iter(accounts_by_root_hash); - // TODO: Turn this into a stable sort for binary search. + + if !chunk_groups.is_empty() { + let chunk_accounts = Vec::from_iter(chunk_groups.into_iter()); + self.process_storage_chunk( + chunk_accounts, + account_storage_roots, + account_storages_snapshots_dir, + &mut chunk_index, + pivot_header, + ) + .await?; + } + + Ok(chunk_index) + } + + async fn process_storage_chunk( + &mut self, + mut accounts_by_root_hash: Vec<(H256, Vec)>, + account_storage_roots: &mut AccountStorageRoots, + account_storages_snapshots_dir: &Path, + chunk_index: &mut u64, + pivot_header: &mut BlockHeader, + ) -> Result<(), PeerHandlerError> { + if accounts_by_root_hash.is_empty() { + return Ok(()); + } + + // Maintain previous prioritization of busy roots accounts_by_root_hash.sort_unstable_by_key(|(_, accounts)| !accounts.len()); - let chunk_size = 300; - let chunk_count = (accounts_by_root_hash.len() / chunk_size) + 1; + let chunk_count = (accounts_by_root_hash.len() / STORAGE_ROOTS_PER_CHUNK) + 1; // list of tasks to be executed // Types are (start_index, end_index, starting_hash) @@ -1294,8 +1336,8 @@ impl PeerHandler { let mut tasks_queue_not_started = VecDeque::::new(); for i in 0..chunk_count { - let chunk_start = chunk_size * i; - let chunk_end = (chunk_start + chunk_size).min(accounts_by_root_hash.len()); + let chunk_start = STORAGE_ROOTS_PER_CHUNK * i; + let chunk_end = (chunk_start + STORAGE_ROOTS_PER_CHUNK).min(accounts_by_root_hash.len()); tasks_queue_not_started.push_back(StorageTask { start_index: chunk_start, end_index: chunk_end, @@ -1321,7 +1363,7 @@ impl PeerHandler { // vector of hashed storage keys and storage values. let mut current_account_storages: BTreeMap = BTreeMap::new(); - debug!("Starting request_storage_ranges loop"); + debug!("Starting request_storage_ranges chunk loop"); loop { if current_account_storages .values() @@ -1352,15 +1394,16 @@ impl PeerHandler { }) .map_err(PeerHandlerError::DumpError)?; } + let file_index = *chunk_index; disk_joinset.spawn(async move { let path = get_account_storages_snapshot_file( &account_storages_snapshots_dir_cloned, - chunk_index, + file_index, ); dump_storages_to_file(&path, snapshot) }); - chunk_index += 1; + *chunk_index += 1; } if let Ok(result) = task_receiver.try_recv() { @@ -1378,9 +1421,7 @@ impl PeerHandler { for (_, accounts) in accounts_by_root_hash[start_index..remaining_start].iter() { for account in accounts { - if !accounts_done.contains_key(account) { - accounts_done.insert(*account, vec![]); - } + accounts_done.entry(*account).or_insert_with(Vec::new); } } @@ -1411,7 +1452,11 @@ impl PeerHandler { let acc_hash = accounts_by_root_hash[remaining_start].1[0]; let (_, old_intervals) = account_storage_roots .accounts_with_storage_root - .get_mut(&acc_hash).ok_or(PeerHandlerError::UnrecoverableError("Tried to get the old download intervals for an account but did not find them".to_owned()))?; + .get_mut(&acc_hash) + .ok_or(PeerHandlerError::UnrecoverableError( + "Tried to get the old download intervals for an account but did not find them" + .to_owned(), + ))?; for (old_start, end) in old_intervals { if end == &hash_end { *old_start = hash_start; @@ -1433,8 +1478,6 @@ impl PeerHandler { if !old_intervals.is_empty() { acc_hash = *account; } - } else { - continue; } } if acc_hash.is_zero() { @@ -1443,7 +1486,10 @@ impl PeerHandler { let (_, old_intervals) = account_storage_roots .accounts_with_storage_root .get_mut(&acc_hash) - .ok_or(PeerHandlerError::UnrecoverableError("Tried to get the old download intervals for an account but did not find them".to_owned()))?; + .ok_or(PeerHandlerError::UnrecoverableError( + "Tried to get the old download intervals for an account but did not find them" + .to_owned(), + ))?; old_intervals.remove( old_intervals .iter() @@ -1455,7 +1501,7 @@ impl PeerHandler { ); if old_intervals.is_empty() { for account in accounts_by_root_hash[remaining_start].1.iter() { - accounts_done.insert(*account, vec![]); + accounts_done.entry(*account).or_insert_with(Vec::new); } } } @@ -1514,7 +1560,10 @@ impl PeerHandler { let (_, intervals) = account_storage_roots .accounts_with_storage_root .get_mut(&accounts_by_root_hash[remaining_start].1[0]) - .ok_or(PeerHandlerError::UnrecoverableError("Tried to get the old download intervals for an account but did not find them".to_owned()))?; + .ok_or(PeerHandlerError::UnrecoverableError( + "Tried to get the old download intervals for an account but did not find them" + .to_owned(), + ))?; for i in 0..chunk_count { let start_hash_u256 = start_hash_u256 + chunk_size * i; @@ -1550,7 +1599,10 @@ impl PeerHandler { let (_, intervals) = account_storage_roots .accounts_with_storage_root .get_mut(&accounts_by_root_hash[remaining_start].1[0]) - .ok_or(PeerHandlerError::UnrecoverableError("Trie to get the old download intervals for an account but did not find them".to_owned()))?; + .ok_or(PeerHandlerError::UnrecoverableError( + "Trie to get the old download intervals for an account but did not find them" + .to_owned(), + ))?; for i in 0..chunk_count { let start_hash_u256 = start_hash_u256 + chunk_size * i; @@ -1695,9 +1747,10 @@ impl PeerHandler { .map_err(|_| PeerHandlerError::CreateStorageSnapshotsDir)?; } let path = - get_account_storages_snapshot_file(account_storages_snapshots_dir, chunk_index); + get_account_storages_snapshot_file(account_storages_snapshots_dir, *chunk_index); dump_storages_to_file(&path, snapshot) - .map_err(|_| PeerHandlerError::WriteStorageSnapshotsDir(chunk_index))?; + .map_err(|_| PeerHandlerError::WriteStorageSnapshotsDir(*chunk_index))?; + *chunk_index += 1; } disk_joinset .join_all() @@ -1725,7 +1778,7 @@ impl PeerHandler { self.peer_table.free_peer(result.peer_id).await; } - Ok(chunk_index + 1) + Ok(()) } async fn request_storage_ranges_worker( From a1397ad02bd9b6ef4a2af125d8cd677ad9058435 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Wed, 1 Oct 2025 19:29:57 -0300 Subject: [PATCH 096/115] reduce trie sorted size to write and buffer count --- crates/common/trie/trie_sorted.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/common/trie/trie_sorted.rs b/crates/common/trie/trie_sorted.rs index e6579bc5f50..946ab5eee30 100644 --- a/crates/common/trie/trie_sorted.rs +++ b/crates/common/trie/trie_sorted.rs @@ -41,8 +41,8 @@ pub enum TrieGenerationError { ThreadJoinError(), } -pub const SIZE_TO_WRITE_DB: u64 = 20_000; -pub const BUFFER_COUNT: u64 = 32; +pub const SIZE_TO_WRITE_DB: u64 = 10_000; +pub const BUFFER_COUNT: u64 = 16; impl CenterSide { fn from_value(tuple: (H256, Vec)) -> CenterSide { From 7f441cfe5c3bd61b5874791bacb0f623a34a9142 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Wed, 1 Oct 2025 20:28:52 -0300 Subject: [PATCH 097/115] size_to_write and buffer_count changes didn't affect trie_sroted memory allocation --- crates/common/trie/trie_sorted.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/common/trie/trie_sorted.rs b/crates/common/trie/trie_sorted.rs index 946ab5eee30..e6579bc5f50 100644 --- a/crates/common/trie/trie_sorted.rs +++ b/crates/common/trie/trie_sorted.rs @@ -41,8 +41,8 @@ pub enum TrieGenerationError { ThreadJoinError(), } -pub const SIZE_TO_WRITE_DB: u64 = 10_000; -pub const BUFFER_COUNT: u64 = 16; +pub const SIZE_TO_WRITE_DB: u64 = 20_000; +pub const BUFFER_COUNT: u64 = 32; impl CenterSide { fn from_value(tuple: (H256, Vec)) -> CenterSide { From 57d9631906187f2dbfaf5db10218da028bdbd0e8 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Wed, 1 Oct 2025 20:55:24 -0300 Subject: [PATCH 098/115] handle a case where snapshots become empty after a stale pivot --- crates/networking/p2p/peer_handler.rs | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/crates/networking/p2p/peer_handler.rs b/crates/networking/p2p/peer_handler.rs index 608e306291e..8f9a059c19b 100644 --- a/crates/networking/p2p/peer_handler.rs +++ b/crates/networking/p2p/peer_handler.rs @@ -66,7 +66,7 @@ pub const SNAP_LIMIT: usize = 128; // increasing them may be the cause of peers disconnection pub const MAX_BLOCK_BODIES_TO_REQUEST: usize = 128; -const STORAGE_ROOTS_PER_CHUNK: usize = 300; +const STORAGE_ROOTS_PER_CHUNK: usize = 10_000; /// An abstraction over the [Kademlia] containing logic to make requests to peers #[derive(Debug, Clone)] @@ -1374,6 +1374,10 @@ impl PeerHandler { let current_account_storages = std::mem::take(&mut current_account_storages); let snapshot = current_account_storages.into_values().collect::>(); + if snapshot.is_empty() { + continue; + } + if !std::fs::exists(account_storages_snapshots_dir) .map_err(|_| PeerHandlerError::NoStorageSnapshotsDir)? { @@ -1746,11 +1750,15 @@ impl PeerHandler { std::fs::create_dir_all(account_storages_snapshots_dir) .map_err(|_| PeerHandlerError::CreateStorageSnapshotsDir)?; } - let path = - get_account_storages_snapshot_file(account_storages_snapshots_dir, *chunk_index); - dump_storages_to_file(&path, snapshot) - .map_err(|_| PeerHandlerError::WriteStorageSnapshotsDir(*chunk_index))?; - *chunk_index += 1; + if snapshot.is_empty() { + warn!(chunk = *chunk_index, "Skipping empty storage snapshot"); + } else { + let path = + get_account_storages_snapshot_file(account_storages_snapshots_dir, *chunk_index); + dump_storages_to_file(&path, snapshot) + .map_err(|_| PeerHandlerError::WriteStorageSnapshotsDir(*chunk_index))?; + *chunk_index += 1; + } } disk_joinset .join_all() From ce53e50f6a3ad0d81ae8c4540b87eb6eb12e698d Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Wed, 1 Oct 2025 23:26:14 -0300 Subject: [PATCH 099/115] go back to previous peer-level parallelism --- crates/networking/p2p/peer_handler.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/crates/networking/p2p/peer_handler.rs b/crates/networking/p2p/peer_handler.rs index 8f9a059c19b..728aa151252 100644 --- a/crates/networking/p2p/peer_handler.rs +++ b/crates/networking/p2p/peer_handler.rs @@ -67,6 +67,8 @@ pub const SNAP_LIMIT: usize = 128; pub const MAX_BLOCK_BODIES_TO_REQUEST: usize = 128; const STORAGE_ROOTS_PER_CHUNK: usize = 10_000; +// How many storage roots we include in a single task sent to a peer. +const STORAGE_ROOTS_PER_TASK: usize = 300; /// An abstraction over the [Kademlia] containing logic to make requests to peers #[derive(Debug, Clone)] @@ -1328,16 +1330,18 @@ impl PeerHandler { // Maintain previous prioritization of busy roots accounts_by_root_hash.sort_unstable_by_key(|(_, accounts)| !accounts.len()); - let chunk_count = (accounts_by_root_hash.len() / STORAGE_ROOTS_PER_CHUNK) + 1; + let total_roots = accounts_by_root_hash.len(); + let task_span = STORAGE_ROOTS_PER_TASK.min(STORAGE_ROOTS_PER_CHUNK); + let task_partition_count = (total_roots + task_span - 1) / task_span; // list of tasks to be executed // Types are (start_index, end_index, starting_hash) // NOTE: end_index is NOT inclusive let mut tasks_queue_not_started = VecDeque::::new(); - for i in 0..chunk_count { - let chunk_start = STORAGE_ROOTS_PER_CHUNK * i; - let chunk_end = (chunk_start + STORAGE_ROOTS_PER_CHUNK).min(accounts_by_root_hash.len()); + for i in 0..task_partition_count { + let chunk_start = task_span * i; + let chunk_end = ((i + 1) * task_span).min(total_roots); tasks_queue_not_started.push_back(StorageTask { start_index: chunk_start, end_index: chunk_end, From 3475ee5b432ed42e9d6300869b841c4f71499fb1 Mon Sep 17 00:00:00 2001 From: Javier Chatruc Date: Thu, 2 Oct 2025 11:25:09 -0300 Subject: [PATCH 100/115] Unconditionally heal big accounts --- crates/networking/p2p/peer_handler.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/networking/p2p/peer_handler.rs b/crates/networking/p2p/peer_handler.rs index 6b1cb48b8aa..a5da7f64481 100644 --- a/crates/networking/p2p/peer_handler.rs +++ b/crates/networking/p2p/peer_handler.rs @@ -1456,6 +1456,7 @@ impl PeerHandler { if old_intervals.is_empty() { for account in accounts_by_root_hash[remaining_start].1.iter() { accounts_done.insert(*account, vec![]); + account_storage_roots.healed_accounts.insert(*account); } } } From 30c3dc9e81c8f56baf767d4fc77f933526e072ba Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Thu, 2 Oct 2025 16:30:28 -0300 Subject: [PATCH 101/115] Added some comments and cleanup to the high level chunking --- crates/networking/p2p/peer_handler.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crates/networking/p2p/peer_handler.rs b/crates/networking/p2p/peer_handler.rs index 728aa151252..580909f9ccb 100644 --- a/crates/networking/p2p/peer_handler.rs +++ b/crates/networking/p2p/peer_handler.rs @@ -1263,7 +1263,7 @@ impl PeerHandler { ) -> Result { *METRICS.current_step.lock().await = "Requesting Storage Ranges".to_string(); debug!("Starting request_storage_ranges function"); - // 1) split the range in chunks of same length + // 1) collect pairs of (account_hash, storage_root) let account_root_pairs: Vec<(H256, Option)> = account_storage_roots .accounts_with_storage_root .iter() @@ -1271,7 +1271,9 @@ impl PeerHandler { .collect(); let mut chunk_groups: BTreeMap> = BTreeMap::new(); + // 2) group accounts by storage root and process them in chunks of STORAGE_ROOTS_PER_CHUNK for (account, maybe_root_hash) in account_root_pairs { + // 2.1) Make sure we have the storage root for the account let root = match maybe_root_hash { Some(root) => root, None => { @@ -1287,6 +1289,7 @@ impl PeerHandler { chunk_groups.entry(root).or_default().push(account); + // 2.2) If we have enough roots, process them if chunk_groups.len() >= STORAGE_ROOTS_PER_CHUNK { let chunk_accounts = Vec::from_iter(chunk_groups.into_iter()); self.process_storage_chunk( @@ -1301,6 +1304,7 @@ impl PeerHandler { } } + // 2.3) Process remaining roots if any if !chunk_groups.is_empty() { let chunk_accounts = Vec::from_iter(chunk_groups.into_iter()); self.process_storage_chunk( From c87fbcf70a8b827acc8cd8f99a5cc38e24b2a1dd Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Thu, 2 Oct 2025 18:02:41 -0300 Subject: [PATCH 102/115] Added some comment and a bit of extra cleanup --- crates/networking/p2p/peer_handler.rs | 40 +++++++++------------------ 1 file changed, 13 insertions(+), 27 deletions(-) diff --git a/crates/networking/p2p/peer_handler.rs b/crates/networking/p2p/peer_handler.rs index 580909f9ccb..1d22e086a6c 100644 --- a/crates/networking/p2p/peer_handler.rs +++ b/crates/networking/p2p/peer_handler.rs @@ -1280,9 +1280,7 @@ impl PeerHandler { store .get_account_state_by_acc_hash(pivot_header.hash(), account) .expect("Failed to get account in state trie") - .expect( - "Could not find account that should have been downloaded or healed", - ) + .expect("Could not find account that should have been downloaded or healed") .storage_root } }; @@ -1336,12 +1334,12 @@ impl PeerHandler { accounts_by_root_hash.sort_unstable_by_key(|(_, accounts)| !accounts.len()); let total_roots = accounts_by_root_hash.len(); let task_span = STORAGE_ROOTS_PER_TASK.min(STORAGE_ROOTS_PER_CHUNK); + // how many fully-populated task_span slices fit in let task_partition_count = (total_roots + task_span - 1) / task_span; // list of tasks to be executed // Types are (start_index, end_index, starting_hash) // NOTE: end_index is NOT inclusive - let mut tasks_queue_not_started = VecDeque::::new(); for i in 0..task_partition_count { let chunk_start = task_span * i; @@ -1371,7 +1369,7 @@ impl PeerHandler { // vector of hashed storage keys and storage values. let mut current_account_storages: BTreeMap = BTreeMap::new(); - debug!("Starting request_storage_ranges chunk loop"); + debug!(chunk = chunk_index, "Starting request_storage_ranges loop"); loop { if current_account_storages .values() @@ -1383,6 +1381,7 @@ impl PeerHandler { let snapshot = current_account_storages.into_values().collect::>(); if snapshot.is_empty() { + // TODO: This happened while testing on pivot changes, we need to understand why continue; } @@ -1465,10 +1464,7 @@ impl PeerHandler { let (_, old_intervals) = account_storage_roots .accounts_with_storage_root .get_mut(&acc_hash) - .ok_or(PeerHandlerError::UnrecoverableError( - "Tried to get the old download intervals for an account but did not find them" - .to_owned(), - ))?; + .ok_or(PeerHandlerError::UnrecoverableError("Tried to get the old download intervals for an account but did not find them".to_owned()))?; for (old_start, end) in old_intervals { if end == &hash_end { *old_start = hash_start; @@ -1498,18 +1494,12 @@ impl PeerHandler { let (_, old_intervals) = account_storage_roots .accounts_with_storage_root .get_mut(&acc_hash) - .ok_or(PeerHandlerError::UnrecoverableError( - "Tried to get the old download intervals for an account but did not find them" - .to_owned(), - ))?; + .ok_or(PeerHandlerError::UnrecoverableError("Tried to get the old download intervals for an account but did not find them".to_owned()))?; old_intervals.remove( old_intervals .iter() .position(|(_old_start, end)| end == &hash_end) - .ok_or(PeerHandlerError::UnrecoverableError( - "Could not find an old interval that we were tracking" - .to_owned(), - ))?, + .ok_or(PeerHandlerError::UnrecoverableError("Could not find an old interval that we were tracking".to_owned()))?, ); if old_intervals.is_empty() { for account in accounts_by_root_hash[remaining_start].1.iter() { @@ -1572,10 +1562,7 @@ impl PeerHandler { let (_, intervals) = account_storage_roots .accounts_with_storage_root .get_mut(&accounts_by_root_hash[remaining_start].1[0]) - .ok_or(PeerHandlerError::UnrecoverableError( - "Tried to get the old download intervals for an account but did not find them" - .to_owned(), - ))?; + .ok_or(PeerHandlerError::UnrecoverableError("Tried to get the old download intervals for an account but did not find them".to_owned()))?; for i in 0..chunk_count { let start_hash_u256 = start_hash_u256 + chunk_size * i; @@ -1611,10 +1598,7 @@ impl PeerHandler { let (_, intervals) = account_storage_roots .accounts_with_storage_root .get_mut(&accounts_by_root_hash[remaining_start].1[0]) - .ok_or(PeerHandlerError::UnrecoverableError( - "Trie to get the old download intervals for an account but did not find them" - .to_owned(), - ))?; + .ok_or(PeerHandlerError::UnrecoverableError("Trie to get the old download intervals for an account but did not find them".to_owned()))?; for i in 0..chunk_count { let start_hash_u256 = start_hash_u256 + chunk_size * i; @@ -1761,8 +1745,10 @@ impl PeerHandler { if snapshot.is_empty() { warn!(chunk = *chunk_index, "Skipping empty storage snapshot"); } else { - let path = - get_account_storages_snapshot_file(account_storages_snapshots_dir, *chunk_index); + let path = get_account_storages_snapshot_file( + account_storages_snapshots_dir, + *chunk_index, + ); dump_storages_to_file(&path, snapshot) .map_err(|_| PeerHandlerError::WriteStorageSnapshotsDir(*chunk_index))?; *chunk_index += 1; From 1d541448c6db243e136a53250834546a3fffa208 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Thu, 2 Oct 2025 18:08:38 -0300 Subject: [PATCH 103/115] formatting --- crates/networking/p2p/peer_handler.rs | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/crates/networking/p2p/peer_handler.rs b/crates/networking/p2p/peer_handler.rs index 1d22e086a6c..3d9f9fde649 100644 --- a/crates/networking/p2p/peer_handler.rs +++ b/crates/networking/p2p/peer_handler.rs @@ -1464,7 +1464,10 @@ impl PeerHandler { let (_, old_intervals) = account_storage_roots .accounts_with_storage_root .get_mut(&acc_hash) - .ok_or(PeerHandlerError::UnrecoverableError("Tried to get the old download intervals for an account but did not find them".to_owned()))?; + .ok_or(PeerHandlerError::UnrecoverableError( + "Tried to get the old download intervals for an account but did not find them" + .to_owned(), + ))?; for (old_start, end) in old_intervals { if end == &hash_end { *old_start = hash_start; @@ -1494,12 +1497,18 @@ impl PeerHandler { let (_, old_intervals) = account_storage_roots .accounts_with_storage_root .get_mut(&acc_hash) - .ok_or(PeerHandlerError::UnrecoverableError("Tried to get the old download intervals for an account but did not find them".to_owned()))?; + .ok_or(PeerHandlerError::UnrecoverableError( + "Tried to get the old download intervals for an account but did not find them" + .to_owned(), + ))?; old_intervals.remove( old_intervals .iter() .position(|(_old_start, end)| end == &hash_end) - .ok_or(PeerHandlerError::UnrecoverableError("Could not find an old interval that we were tracking".to_owned()))?, + .ok_or(PeerHandlerError::UnrecoverableError( + "Could not find an old interval that we were tracking" + .to_owned(), + ))?, ); if old_intervals.is_empty() { for account in accounts_by_root_hash[remaining_start].1.iter() { @@ -1562,7 +1571,10 @@ impl PeerHandler { let (_, intervals) = account_storage_roots .accounts_with_storage_root .get_mut(&accounts_by_root_hash[remaining_start].1[0]) - .ok_or(PeerHandlerError::UnrecoverableError("Tried to get the old download intervals for an account but did not find them".to_owned()))?; + .ok_or(PeerHandlerError::UnrecoverableError( + "Tried to get the old download intervals for an account but did not find them" + .to_owned(), + ))?; for i in 0..chunk_count { let start_hash_u256 = start_hash_u256 + chunk_size * i; @@ -1598,7 +1610,10 @@ impl PeerHandler { let (_, intervals) = account_storage_roots .accounts_with_storage_root .get_mut(&accounts_by_root_hash[remaining_start].1[0]) - .ok_or(PeerHandlerError::UnrecoverableError("Trie to get the old download intervals for an account but did not find them".to_owned()))?; + .ok_or(PeerHandlerError::UnrecoverableError( + "Trie to get the old download intervals for an account but did not find them" + .to_owned(), + ))?; for i in 0..chunk_count { let start_hash_u256 = start_hash_u256 + chunk_size * i; From 42a297e7c61a603e66bf1125d838baf11c36218d Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Thu, 2 Oct 2025 18:18:55 -0300 Subject: [PATCH 104/115] Added one more comment --- crates/networking/p2p/peer_handler.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/networking/p2p/peer_handler.rs b/crates/networking/p2p/peer_handler.rs index 3d9f9fde649..b08beaf9be9 100644 --- a/crates/networking/p2p/peer_handler.rs +++ b/crates/networking/p2p/peer_handler.rs @@ -1758,6 +1758,7 @@ impl PeerHandler { .map_err(|_| PeerHandlerError::CreateStorageSnapshotsDir)?; } if snapshot.is_empty() { + // TODO: This happened while testing on pivot changes, we need to understand why warn!(chunk = *chunk_index, "Skipping empty storage snapshot"); } else { let path = get_account_storages_snapshot_file( From c0ceeff3c46f47d986ea9a14f76b84b56cd87931 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Thu, 2 Oct 2025 18:21:43 -0300 Subject: [PATCH 105/115] Update utils.rs --- crates/networking/p2p/utils.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/crates/networking/p2p/utils.rs b/crates/networking/p2p/utils.rs index d3145974cc6..e92438046db 100644 --- a/crates/networking/p2p/utils.rs +++ b/crates/networking/p2p/utils.rs @@ -77,6 +77,11 @@ pub fn dump_accounts_to_rocks_db( path: &Path, mut contents: Vec<(H256, AccountState)>, ) -> Result<(), rocksdb::Error> { + // This can happen sometimes during download, and the sst ingestion method + // fails with empty chunk files + if contents.is_empty() { + return Ok(()); + } contents.sort_by_key(|(k, _)| *k); contents.dedup_by_key(|(k, _)| { let mut buf = [0u8; 32]; @@ -100,6 +105,11 @@ pub fn dump_storages_to_rocks_db( path: &Path, mut contents: Vec<(H256, H256, U256)>, ) -> Result<(), rocksdb::Error> { + // This can happen sometimes during download, and the sst ingestion method + // fails with empty chunk files + if contents.is_empty() { + return Ok(()); + } contents.sort(); contents.dedup_by_key(|(k0, k1, _)| { let mut buffer = [0_u8; 64]; From 5580ea1cb0b22688d5bbc508e321fbe2adc6869e Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Fri, 3 Oct 2025 15:12:18 -0300 Subject: [PATCH 106/115] Update trie_sorted.rs --- crates/common/trie/trie_sorted.rs | 41 +++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/crates/common/trie/trie_sorted.rs b/crates/common/trie/trie_sorted.rs index e6579bc5f50..d034236904f 100644 --- a/crates/common/trie/trie_sorted.rs +++ b/crates/common/trie/trie_sorted.rs @@ -313,10 +313,10 @@ mod test { use ethereum_types::U256; use ethrex_rlp::encode::RLPEncode; - use crate::Trie; + use crate::{InMemoryTrieDB, Trie}; use super::*; - use std::{collections::BTreeMap, str::FromStr}; + use std::{collections::BTreeMap, str::FromStr, sync::Mutex}; fn generate_input_1() -> BTreeMap> { let mut accounts: BTreeMap> = BTreeMap::new(); @@ -368,6 +368,23 @@ mod test { accounts } + fn generate_input_5() -> BTreeMap> { + let mut accounts: BTreeMap> = BTreeMap::new(); + for (string, value) in [ + ( + "290decd9548b62a8d60345a988386fc84ba6bc95484008f6362f93160ef3e563", + U256::from_str("1191240792495687806002885977912460542139236513636").unwrap(), + ), + ( + "295841a49a1089f4b560f91cfbb0133326654dcbb1041861fc5dde96c724a22f", + U256::from(480), + ), + ] { + accounts.insert(H256::from_str(string).unwrap(), value.encode_to_vec()); + } + accounts + } + fn generate_input_slots_1() -> BTreeMap { let mut slots: BTreeMap = BTreeMap::new(); for string in [ @@ -381,7 +398,8 @@ mod test { } pub fn run_test_account_state(accounts: BTreeMap>) { - let trie = Trie::stateless(); + let computed_data = Arc::new(Mutex::new(BTreeMap::new())); + let trie = Trie::new(Box::new(InMemoryTrieDB::new(computed_data.clone()))); let db = trie.db(); let tested_trie_hash: H256 = trie_from_sorted_accounts_wrap( db, @@ -392,13 +410,21 @@ mod test { ) .expect("Shouldn't have errors"); - let mut trie: Trie = Trie::empty_in_memory(); + let expected_data = Arc::new(Mutex::new(BTreeMap::new())); + let mut trie = Trie::new(Box::new(InMemoryTrieDB::new(expected_data.clone()))); for account in accounts.iter() { trie.insert(account.0.as_bytes().to_vec(), account.1.encode_to_vec()) .unwrap(); } - assert!(tested_trie_hash == trie.hash_no_commit()) + assert_eq!(tested_trie_hash, trie.hash().unwrap()); + + let computed_data = computed_data.lock().unwrap(); + let expected_data = expected_data.lock().unwrap(); + for (k, v) in computed_data.iter() { + assert!(expected_data.contains_key(k)); + assert_eq!(*v, expected_data[k]); + } } pub fn run_test_storage_slots(slots: BTreeMap) { @@ -444,6 +470,11 @@ mod test { run_test_account_state(generate_input_4()); } + #[test] + fn test_5() { + run_test_account_state(generate_input_5()); + } + #[test] fn test_slots_1() { run_test_storage_slots(generate_input_slots_1()); From 4bc928e9beadc50c1a4736efd1715c5adba01d7d Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Fri, 3 Oct 2025 15:25:15 -0300 Subject: [PATCH 107/115] Fixed failing test --- crates/common/trie/trie_sorted.rs | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/crates/common/trie/trie_sorted.rs b/crates/common/trie/trie_sorted.rs index d034236904f..f666a6f5ded 100644 --- a/crates/common/trie/trie_sorted.rs +++ b/crates/common/trie/trie_sorted.rs @@ -160,17 +160,6 @@ where let mut center_side: CenterSide = CenterSide::from_value(initial_value.clone()); let mut right_side_opt: Option<(H256, Vec)> = data_iter.next(); - // Edge Case - if right_side_opt.is_none() { - let node = LeafNode { - partial: center_side.path, - value: initial_value.1, - }; - let hash = node.compute_hash(); - flush_nodes_to_write(vec![(hash, node.into())], db, buffer_sender)?; - return Ok(hash.finalize()); - } - while let Some(right_side) = right_side_opt { if nodes_to_write.len() as u64 > SIZE_TO_WRITE_DB { let buffer_sender = buffer_sender.clone(); @@ -250,7 +239,7 @@ where .unwrap(); debug_assert!(nodes_to_write.last().unwrap().0 == child.compute_hash()); - let (_, node_hash_ref) = nodes_to_write.iter_mut().last().unwrap(); + let (node_hash, node_hash_ref) = nodes_to_write.iter_mut().last().unwrap(); match node_hash_ref { Node::Branch(_) => { let node: Node = ExtensionNode { @@ -267,9 +256,14 @@ where } Node::Extension(extension_node) => { extension_node.prefix.data.insert(0, index as u8); - extension_node.compute_hash().finalize() + *node_hash = extension_node.compute_hash(); + node_hash.finalize() + } + Node::Leaf(leaf_node) => { + leaf_node.partial.data.insert(0, index as u8); + *node_hash = leaf_node.compute_hash(); + node_hash.finalize() } - Node::Leaf(leaf_node) => leaf_node.compute_hash().finalize(), } } else { let node: Node = left_side.element.into(); From f9fcccdb3c62f377df2a1781018ac73694ae5f28 Mon Sep 17 00:00:00 2001 From: Javier Chatruc Date: Fri, 3 Oct 2025 17:36:28 -0300 Subject: [PATCH 108/115] Only call request_storage_ranges twice as a temporary solution while logging the acocunts on error --- crates/networking/p2p/sync.rs | 43 ++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 97c602e3781..db230eae573 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -916,6 +916,7 @@ impl Syncer { // is correct. To do so, we always heal the state trie before requesting storage rates let mut chunk_index = 0_u64; let mut state_leafs_healed = 0_u64; + let mut storage_range_request_attempts = 0; loop { while block_is_stale(&pivot_header) { pivot_header = update_pivot( @@ -947,17 +948,37 @@ impl Syncer { "Started request_storage_ranges with {} accounts with storage root unchanged", storage_accounts.accounts_with_storage_root.len() ); - chunk_index = self - .peers - .request_storage_ranges( - &mut storage_accounts, - account_storages_snapshots_dir.as_ref(), - chunk_index, - &mut pivot_header, - store.clone(), - ) - .await - .map_err(SyncError::PeerHandler)?; + // This variable is a temporary solution until we figure out why we are + // sometimes trying infinitely on request_storage_ranges. It's not a big deal + // since the next healing step will fix it, but it's a bug and it should be fixed. + storage_range_request_attempts += 1; + if storage_range_request_attempts < 3 { + chunk_index = self + .peers + .request_storage_ranges( + &mut storage_accounts, + account_storages_snapshots_dir.as_ref(), + chunk_index, + &mut pivot_header, + store.clone(), + ) + .await + .map_err(SyncError::PeerHandler)?; + } else { + for (acc_hash, (maybe_root, old_intervals)) in + storage_accounts.accounts_with_storage_root.iter() + { + storage_accounts.healed_accounts.insert(*acc_hash); + error!( + "We couldn't download these accounts on request_storage_ranges. Account hash: {:x?}, {:x?}. Number of intervals {}", + acc_hash, + maybe_root, + old_intervals.len() + ); + } + + storage_accounts.accounts_with_storage_root.clear(); + } free_peers_and_log_if_not_empty(&self.peers).await; info!( From 6656c32c8ea7758213c8832a26e482d82f344a32 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Thu, 2 Oct 2025 16:48:13 -0300 Subject: [PATCH 109/115] Update server_runner.py --- tooling/sync/server_runner.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tooling/sync/server_runner.py b/tooling/sync/server_runner.py index 702e597160d..5d8e1a93b08 100644 --- a/tooling/sync/server_runner.py +++ b/tooling/sync/server_runner.py @@ -125,7 +125,7 @@ def send_slack_message_failed(header: str, hostname: str, maybe_timeout_in_minut return -def send_slack_message_success(hostname: str, elapsed, network: str, log_file: str, branch: str): +def send_slack_message_success(hostname: str, elapsed, network: str, log_file: str, branch: str, debug_assert: bool): try: commit = get_git_commit() webhook_url = os.environ["SLACK_WEBHOOK_URL_SUCCESS"] @@ -143,7 +143,7 @@ def send_slack_message_success(hostname: str, elapsed, network: str, log_file: s "type": "section", "text": { "type": "mrkdwn", - "text": f'*Server:* `{hostname}`\n*Synced in:* {format_elapsed_time(elapsed)}\n*Logs in:* `{log_file}`\n*Branch:* `{branch}`\n*Commit:* `{commit if commit else "N/A"}`' + "text": f'*Server:* `{hostname}`\n*Synced in:* {format_elapsed_time(elapsed)}\n*Logs in:* `{log_file}`\n*Branch:* `{branch}`\n*Commit:* `{commit if commit else "N/A"}`\n*Validation:* `{"true" if debug_assert else "false"}`' } } ] @@ -188,7 +188,7 @@ def get_variables(args): def block_production_loop( - hostname, args, logs_file, elapsed, start_time, block_production_payload + hostname, args, logs_file, elapsed, start_time, block_production_payload, debug_assert ): current_block_number = 0 block_start_time = time.time() @@ -196,7 +196,7 @@ def block_production_loop( block_elapsed = time.time() - block_start_time if block_elapsed > 30 * 60: # 30 minutes print("✅ Node is fully synced!") - send_slack_message_success(hostname, elapsed, args.network, f"{logs_file}_{start_time}.log", args.branch) + send_slack_message_success(hostname, elapsed, args.network, f"{logs_file}_{start_time}.log", args.branch, debug_assert) with open("sync_logs.txt", "a") as f: f.write(f"LOGS_FILE={logs_file}_{start_time}.log SYNCED\n") return True @@ -222,7 +222,7 @@ def block_production_loop( def verification_loop( - logs_file, args, hostname, payload, block_production_payload, start_time + logs_file, args, hostname, payload, block_production_payload, start_time, debug_assert ): while True: try: @@ -243,6 +243,7 @@ def verification_loop( elapsed, start_time, block_production_payload, + debug_assert, ) return success time.sleep(CHECK_INTERVAL) @@ -251,7 +252,7 @@ def verification_loop( def execution_loop( - command, logs_file, args, hostname, payload, block_production_payload + command, logs_file, args, hostname, payload, block_production_payload, debug_assert ): while True: start_time = time.time() @@ -262,7 +263,7 @@ def execution_loop( print("No monitor flag set, exiting.") break success = verification_loop( - logs_file, args, hostname, payload, block_production_payload, start_time + logs_file, args, hostname, payload, block_production_payload, start_time, debug_assert ) if not success: break @@ -287,7 +288,7 @@ def main(): } try: execution_loop( - command, logs_file, args, hostname, payload, block_production_payload + command, logs_file, args, hostname, payload, block_production_payload, args.debug_assert ) except subprocess.CalledProcessError as e: print(f"An error occurred while running the make command: {e}", file=sys.stderr) From b50703dd266357427d4f97e782a997c58fcf55eb Mon Sep 17 00:00:00 2001 From: Javier Chatruc Date: Sun, 5 Oct 2025 11:15:22 -0300 Subject: [PATCH 110/115] Fix bug where we would get stuck trying to insert storage tries for accounts we hadn't downloaded their leaves --- crates/networking/p2p/sync.rs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index db230eae573..82f1c7ce3a5 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -890,7 +890,7 @@ impl Syncer { // Variable `accounts_with_storage` unused if not in rocksdb #[allow(unused_variables)] - let (computed_state_root, accounts_with_storage) = insert_accounts( + let (computed_state_root, mut accounts_with_storage) = insert_accounts( store.clone(), &mut storage_accounts, &account_state_snapshots_dir, @@ -948,9 +948,6 @@ impl Syncer { "Started request_storage_ranges with {} accounts with storage root unchanged", storage_accounts.accounts_with_storage_root.len() ); - // This variable is a temporary solution until we figure out why we are - // sometimes trying infinitely on request_storage_ranges. It's not a big deal - // since the next healing step will fix it, but it's a bug and it should be fixed. storage_range_request_attempts += 1; if storage_range_request_attempts < 3 { chunk_index = self @@ -968,8 +965,15 @@ impl Syncer { for (acc_hash, (maybe_root, old_intervals)) in storage_accounts.accounts_with_storage_root.iter() { + // When we fall into this case what happened is there are certain accounts for which + // the storage root went back to a previous value we already had, and thus could not download + // their storage leaves because we were using an old value for their storage root. + // The fallback is to ensure we + // 1. Do not try to insert its leaves, as we don't have them + // 2. Mark it for storage healing. storage_accounts.healed_accounts.insert(*acc_hash); - error!( + accounts_with_storage.remove(acc_hash); + warn!( "We couldn't download these accounts on request_storage_ranges. Account hash: {:x?}, {:x?}. Number of intervals {}", acc_hash, maybe_root, From 612a8d051d420637a7918c05f877e8702f5e7cb6 Mon Sep 17 00:00:00 2001 From: Javier Chatruc Date: Mon, 6 Oct 2025 10:46:58 -0300 Subject: [PATCH 111/115] Readd rocksdb feature for p2p --- cmd/ethrex/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/ethrex/Cargo.toml b/cmd/ethrex/Cargo.toml index 407bd2dab5c..bf33fdb179e 100644 --- a/cmd/ethrex/Cargo.toml +++ b/cmd/ethrex/Cargo.toml @@ -72,7 +72,7 @@ c-kzg = [ "ethrex-p2p/c-kzg", ] metrics = ["ethrex-blockchain/metrics", "ethrex-l2/metrics"] -rocksdb = ["ethrex-storage/rocksdb"] +rocksdb = ["ethrex-storage/rocksdb", "ethrex-p2p/rocksdb"] jemalloc = ["dep:tikv-jemallocator"] jemalloc_profiling = [ "jemalloc", From dce6c08eaab293cf5b12a88ef8b99e38bbe5b6d8 Mon Sep 17 00:00:00 2001 From: Javier Chatruc Date: Mon, 6 Oct 2025 12:44:48 -0300 Subject: [PATCH 112/115] Change log showing accounts sent for storage healing after request storage ranges to be debug! --- crates/networking/p2p/sync.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 57cb72d738b..1e87a47d19c 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1087,8 +1087,9 @@ impl Syncer { // 2. Mark it for storage healing. storage_accounts.healed_accounts.insert(*acc_hash); accounts_with_storage.remove(acc_hash); - warn!( - "We couldn't download these accounts on request_storage_ranges. Account hash: {:x?}, {:x?}. Number of intervals {}", + debug!( + "We couldn't download these accounts on request_storage_ranges. Falling back to storage healing for it. + Account hash: {:x?}, {:x?}. Number of intervals {}", acc_hash, maybe_root, old_intervals.len() From 5110a89b52e376bb1b4917f745618093080ec3e7 Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Mon, 6 Oct 2025 13:05:03 -0300 Subject: [PATCH 113/115] fixed empty accounts case --- crates/common/trie/trie_sorted.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/common/trie/trie_sorted.rs b/crates/common/trie/trie_sorted.rs index f666a6f5ded..0e0859596f7 100644 --- a/crates/common/trie/trie_sorted.rs +++ b/crates/common/trie/trie_sorted.rs @@ -148,15 +148,15 @@ pub fn trie_from_sorted_accounts<'scope, T>( where T: Iterator)> + Send, { + let Some(initial_value) = data_iter.next() else { + return Ok(*EMPTY_TRIE_HASH); + }; let mut nodes_to_write: Vec<(NodeHash, Node)> = buffer_receiver .recv() .expect("This channel shouldn't close"); let mut trie_stack: Vec = Vec::with_capacity(64); // Optimized for H256 let mut left_side = StackElement::default(); - let Some(initial_value) = data_iter.next() else { - return Ok(*EMPTY_TRIE_HASH); - }; let mut center_side: CenterSide = CenterSide::from_value(initial_value.clone()); let mut right_side_opt: Option<(H256, Vec)> = data_iter.next(); From 508022eb2a376f66d6aabf5b9b148be60273f3da Mon Sep 17 00:00:00 2001 From: Francisco Xavier Gauna Date: Mon, 6 Oct 2025 15:09:56 -0300 Subject: [PATCH 114/115] Removed unneeded remove + linting --- crates/networking/p2p/sync.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 094b1834044..08ff115e678 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -1002,7 +1002,7 @@ impl Syncer { // Variable `accounts_with_storage` unused if not in rocksdb #[allow(unused_variables)] - let (computed_state_root, mut accounts_with_storage) = insert_accounts( + let (computed_state_root, accounts_with_storage) = insert_accounts( store.clone(), &mut storage_accounts, &account_state_snapshots_dir, @@ -1080,11 +1080,8 @@ impl Syncer { // When we fall into this case what happened is there are certain accounts for which // the storage root went back to a previous value we already had, and thus could not download // their storage leaves because we were using an old value for their storage root. - // The fallback is to ensure we - // 1. Do not try to insert its leaves, as we don't have them - // 2. Mark it for storage healing. + // The fallback is to ensure we mark it for storage healing. storage_accounts.healed_accounts.insert(*acc_hash); - accounts_with_storage.remove(acc_hash); debug!( "We couldn't download these accounts on request_storage_ranges. Falling back to storage healing for it. Account hash: {:x?}, {:x?}. Number of intervals {}", From c2bacdd435619b1669e36dbdf0de8855ca077934 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Tue, 7 Oct 2025 16:36:54 -0300 Subject: [PATCH 115/115] remove guardrails for empty snapshots --- crates/networking/p2p/peer_handler.rs | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/crates/networking/p2p/peer_handler.rs b/crates/networking/p2p/peer_handler.rs index c3d84790d7e..9fab8565552 100644 --- a/crates/networking/p2p/peer_handler.rs +++ b/crates/networking/p2p/peer_handler.rs @@ -1399,11 +1399,6 @@ impl PeerHandler { let current_account_storages = std::mem::take(&mut current_account_storages); let snapshot = current_account_storages.into_values().collect::>(); - if snapshot.is_empty() { - // TODO: This happened while testing on pivot changes, we need to understand why - continue; - } - if !std::fs::exists(account_storages_snapshots_dir) .map_err(|_| PeerHandlerError::NoStorageSnapshotsDir)? { @@ -1777,18 +1772,14 @@ impl PeerHandler { std::fs::create_dir_all(account_storages_snapshots_dir) .map_err(|_| PeerHandlerError::CreateStorageSnapshotsDir)?; } - if snapshot.is_empty() { - // TODO: This happened while testing on pivot changes, we need to understand why - warn!(chunk = *chunk_index, "Skipping empty storage snapshot"); - } else { - let path = get_account_storages_snapshot_file( - account_storages_snapshots_dir, - *chunk_index, - ); - dump_storages_to_file(&path, snapshot) - .map_err(|_| PeerHandlerError::WriteStorageSnapshotsDir(*chunk_index))?; - *chunk_index += 1; - } + + let path = get_account_storages_snapshot_file( + account_storages_snapshots_dir, + *chunk_index, + ); + dump_storages_to_file(&path, snapshot) + .map_err(|_| PeerHandlerError::WriteStorageSnapshotsDir(*chunk_index))?; + *chunk_index += 1; } disk_joinset .join_all()