Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 20 additions & 28 deletions crates/bytecode/src/legacy/analysis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,13 @@ use super::JumpTable;
use crate::opcode;
use bitvec::{bitvec, order::Lsb0, vec::BitVec};
use primitives::Bytes;
use std::{vec, vec::Vec};
use std::vec::Vec;

/// Analyze the bytecode to find the jumpdests. Used to create a jump table
/// that is needed for [`crate::LegacyAnalyzedBytecode`].
/// This function contains a hot loop and should be optimized as much as possible.
/// Analyzes the bytecode for use in [`LegacyAnalyzedBytecode`](crate::LegacyAnalyzedBytecode).
///
/// # Safety
/// See [`LegacyAnalyzedBytecode`](crate::LegacyAnalyzedBytecode) for more details.
///
/// The function uses unsafe pointer arithmetic, but maintains the following invariants:
/// - The iterator never advances beyond the end of the bytecode
/// - All pointer offsets are within bounds of the bytecode
/// - The jump table is never accessed beyond its allocated size
///
/// Undefined behavior if the bytecode does not end with a valid STOP opcode. Please check
/// [`crate::LegacyAnalyzedBytecode::new`] for details on how the bytecode is validated.
/// Prefer using [`LegacyAnalyzedBytecode::analyze`](crate::LegacyAnalyzedBytecode::analyze) instead.
pub fn analyze_legacy(bytecode: Bytes) -> (JumpTable, Bytes) {
if bytecode.is_empty() {
return (JumpTable::default(), Bytes::from_static(&[opcode::STOP]));
Expand All @@ -31,38 +23,38 @@ pub fn analyze_legacy(bytecode: Bytes) -> (JumpTable, Bytes) {

while iterator < end {
opcode = unsafe { *iterator };
if opcode::JUMPDEST == opcode {
if opcode == opcode::JUMPDEST {
// SAFETY: Jumps are max length of the code
unsafe { jumps.set_unchecked(iterator.offset_from(start) as usize, true) }
iterator = unsafe { iterator.offset(1) };
iterator = unsafe { iterator.add(1) };
} else {
let push_offset = opcode.wrapping_sub(opcode::PUSH1);
if push_offset < 32 {
// SAFETY: Iterator access range is checked in the while loop
iterator = unsafe { iterator.offset((push_offset + 2) as isize) };
iterator = unsafe { iterator.add(push_offset as usize + 2) };
} else {
// SAFETY: Iterator access range is checked in the while loop
iterator = unsafe { iterator.offset(1) };
iterator = unsafe { iterator.add(1) };
}
}
}

// Calculate padding needed to ensure bytecode ends with STOP
// If we're at the end and last opcode is not STOP, we need 1 more byte
let padding_size = (iterator as usize) - (end as usize) + (opcode != opcode::STOP) as usize;
if padding_size > 0 {
let mut padded_bytecode = Vec::with_capacity(bytecode.len() + padding_size);
padded_bytecode.extend_from_slice(&bytecode);
padded_bytecode.extend(vec![0; padding_size]);
(JumpTable::new(jumps), Bytes::from(padded_bytecode))
let padding = (iterator as usize) - (end as usize) + (opcode != opcode::STOP) as usize;
let bytecode = if padding > 0 {
let mut padded = Vec::with_capacity(bytecode.len() + padding);
padded.extend_from_slice(&bytecode);
padded.resize(padded.len() + padding, 0);
Bytes::from(padded)
} else {
(JumpTable::new(jumps), bytecode)
}
bytecode
};

(JumpTable::new(jumps), bytecode)
}

#[cfg(test)]
mod tests {
#[allow(unused_imports)]
use crate::{legacy::analyze_legacy, opcode};
use super::*;

#[test]
fn test_bytecode_ends_with_stop_no_padding_needed() {
Expand Down
61 changes: 33 additions & 28 deletions crates/bytecode/src/legacy/analyzed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ use primitives::Bytes;
///
/// # Bytecode Padding
///
/// All legacy bytecode is padded with 33 zero bytes at the end. This padding ensures the
/// bytecode always ends with a valid STOP (0x00) opcode. The reason for 33 bytes padding (and not one byte)
/// is handling the edge cases where a PUSH32 opcode appears at the end of the original
/// bytecode without enough remaining bytes for its immediate data. Original bytecode length
/// is stored in order to be able to copy original bytecode.
/// Legacy bytecode can be padded with up to 33 zero bytes at the end. This padding ensures that:
/// - the bytecode always ends with a valid STOP (0x00) opcode.
/// - there aren't incomplete immediates, meaning we can skip bounds checks in `PUSH*` instructions.
///
/// The non-padded length is stored in order to be able to copy the original bytecode.
///
/// # Gas safety
///
Expand All @@ -29,11 +29,11 @@ use primitives::Bytes;
#[derive(Clone, Debug, PartialEq, Eq, Hash, Ord, PartialOrd)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct LegacyAnalyzedBytecode {
/// Bytecode with 33 zero bytes padding
/// The potentially padded bytecode.
bytecode: Bytes,
/// Original bytes length
/// The original bytecode length.
original_len: usize,
/// Jump table
/// The jump table.
jump_table: JumpTable,
}

Expand All @@ -49,33 +49,38 @@ impl Default for LegacyAnalyzedBytecode {
}

impl LegacyAnalyzedBytecode {
/// Analyzes the bytecode.
///
/// See [`LegacyAnalyzedBytecode`] for more details.
pub fn analyze(bytecode: Bytes) -> Self {
let original_len = bytecode.len();
let (jump_table, padded_bytecode) = super::analysis::analyze_legacy(bytecode);
Self::new(padded_bytecode, original_len, jump_table)
}

/// Creates new analyzed bytecode.
///
/// Prefer instantiating using [`analyze`](Self::analyze) instead.
///
/// # Panics
///
/// * If `original_len` is greater than `bytecode.len()`
/// * If jump table length is less than `original_len`.
/// * If last bytecode byte is not `0x00` or if bytecode is empty.
pub fn new(bytecode: Bytes, original_len: usize, jump_table: JumpTable) -> Self {
if original_len > bytecode.len() {
panic!("original_len is greater than bytecode length");
}
if original_len > jump_table.len() {
panic!(
"jump table length {} is less than original length {}",
jump_table.len(),
original_len
);
}

if bytecode.is_empty() {
panic!("bytecode cannot be empty");
}

if bytecode.last() != Some(&opcode::STOP) {
panic!("last bytecode byte should be STOP (0x00)");
}

assert!(
original_len <= bytecode.len(),
"original_len is greater than bytecode length"
);
assert!(
original_len <= jump_table.len(),
"jump table length is less than original length"
);
assert!(!bytecode.is_empty(), "bytecode cannot be empty");
assert!(
bytecode.last() == Some(&opcode::STOP),
"last bytecode byte should be STOP (0x00)"
);
Self {
bytecode,
original_len,
Expand Down Expand Up @@ -137,7 +142,7 @@ mod tests {
}

#[test]
#[should_panic(expected = "jump table length 1 is less than original length 2")]
#[should_panic(expected = "jump table length is less than original length")]
fn test_panic_on_short_jump_table() {
let bytecode = Bytes::from_static(&[opcode::PUSH1, 0x01]);
let bytecode = LegacyRawBytecode(bytecode).into_analyzed();
Expand Down
14 changes: 5 additions & 9 deletions crates/bytecode/src/legacy/raw.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,18 @@
use super::{analyze_legacy, LegacyAnalyzedBytecode};
use super::LegacyAnalyzedBytecode;
use core::ops::Deref;
use primitives::Bytes;

/// Used only as intermediate representation for legacy bytecode.
/// Please check [`LegacyAnalyzedBytecode`] for the main structure that is used in Revm.
///
/// See [`LegacyAnalyzedBytecode`] for the main structure that is used in Revm.
#[derive(Clone, Debug, PartialEq, Eq, Hash, Ord, PartialOrd)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct LegacyRawBytecode(pub Bytes);

impl LegacyRawBytecode {
/// Converts the raw bytecode into an analyzed bytecode.
///
/// It extends the bytecode with 33 zero bytes and analyzes it to find the jumpdests.
/// Analyzes the bytecode, instantiating a [`LegacyAnalyzedBytecode`].
pub fn into_analyzed(self) -> LegacyAnalyzedBytecode {
let bytecode = self.0;
let len = bytecode.len();
let (jump_table, padded_bytecode) = analyze_legacy(bytecode);
LegacyAnalyzedBytecode::new(padded_bytecode, len, jump_table)
LegacyAnalyzedBytecode::analyze(self.0)
}
}

Expand Down
Loading