perf(interpreter): improve i256 instructions (#630)

DaniPopes · web-flow · commit 37b0192886c5 · 2023-08-27T11:03:50.000+02:00
* perf(interpreter): improve i256 instructions

* chore: remove unused code, address review

* perf: drop zero check after dividing
diff --git a/crates/interpreter/src/instructions/bitwise.rs b/crates/interpreter/src/instructions/bitwise.rs
@@ -1,4 +1,4 @@
-use super::i256::{i256_cmp, i256_sign, two_compl, Sign};
+use super::i256::{i256_cmp, i256_sign_compl, two_compl, Sign};
 use crate::{
     gas,
     primitives::SpecId::CONSTANTINOPLE,
@@ -31,21 +31,13 @@ pub fn gt(interpreter: &mut Interpreter, _host: &mut dyn Host) {
 pub fn slt(interpreter: &mut Interpreter, _host: &mut dyn Host) {
     gas!(interpreter, gas::VERYLOW);
     pop_top!(interpreter, op1, op2);
-    *op2 = if i256_cmp(op1, *op2) == Ordering::Less {
-        U256::from(1)
-    } else {
-        U256::ZERO
-    }
+    *op2 = U256::from(i256_cmp(&op1, op2) == Ordering::Less);
 }
 
 pub fn sgt(interpreter: &mut Interpreter, _host: &mut dyn Host) {
     gas!(interpreter, gas::VERYLOW);
     pop_top!(interpreter, op1, op2);
-    *op2 = if i256_cmp(op1, *op2) == Ordering::Greater {
-        U256::from(1)
-    } else {
-        U256::ZERO
-    };
+    *op2 = U256::from(i256_cmp(&op1, op2) == Ordering::Greater);
 }
 
 pub fn eq(interpreter: &mut Interpreter, _host: &mut dyn Host) {
@@ -125,26 +117,21 @@ pub fn sar<SPEC: Spec>(interpreter: &mut Interpreter, _host: &mut dyn Host) {
     gas!(interpreter, gas::VERYLOW);
     pop_top!(interpreter, op1, op2);
 
-    let value_sign = i256_sign::<true>(op2);
+    let value_sign = i256_sign_compl(op2);
 
     *op2 = if *op2 == U256::ZERO || op1 >= U256::from(256) {
         match value_sign {
             // value is 0 or >=1, pushing 0
             Sign::Plus | Sign::Zero => U256::ZERO,
             // value is <0, pushing -1
-            Sign::Minus => two_compl(U256::from(1)),
+            Sign::Minus => U256::MAX,
         }
     } else {
+        const ONE: U256 = U256::from_limbs([1, 0, 0, 0]);
         let shift = usize::try_from(op1).unwrap();
-
         match value_sign {
-            Sign::Plus | Sign::Zero => *op2 >> shift,
-            Sign::Minus => {
-                let shifted = ((op2.overflowing_sub(U256::from(1)).0) >> shift)
-                    .overflowing_add(U256::from(1))
-                    .0;
-                two_compl(shifted)
-            }
+            Sign::Plus | Sign::Zero => op2.wrapping_shr(shift),
+            Sign::Minus => two_compl(op2.wrapping_sub(ONE).wrapping_shr(shift).wrapping_add(ONE)),
         }
     };
 }
diff --git a/crates/interpreter/src/instructions/i256.rs b/crates/interpreter/src/instructions/i256.rs
@@ -1,28 +1,17 @@
 use crate::primitives::U256;
 use core::cmp::Ordering;
 
-#[cfg(test)]
-use proptest_derive::Arbitrary as PropTestArbitrary;
-
-#[cfg(any(test, feature = "arbitrary"))]
-use arbitrary::Arbitrary;
-
-#[cfg_attr(test, derive(PropTestArbitrary))]
-#[cfg_attr(any(test, feature = "arbitrary"), derive(Arbitrary))]
-#[derive(Copy, Clone, Eq, PartialEq, Debug)]
-pub enum Sign {
-    Plus,
-    Minus,
-    Zero,
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+#[repr(i8)]
+pub(super) enum Sign {
+    // same as `cmp::Ordering`
+    Minus = -1,
+    Zero = 0,
+    #[allow(dead_code)] // "constructed" with `mem::transmute` in `i256_sign` below
+    Plus = 1,
 }
 
-pub const _SIGN_BIT_MASK: U256 = U256::from_limbs([
-    0xFFFFFFFFFFFFFFFF,
-    0xFFFFFFFFFFFFFFFF,
-    0xFFFFFFFFFFFFFFFF,
-    0x7FFFFFFFFFFFFFFF,
-]);
-pub const MIN_NEGATIVE_VALUE: U256 = U256::from_limbs([
+const MIN_NEGATIVE_VALUE: U256 = U256::from_limbs([
     0x0000000000000000,
     0x0000000000000000,
     0x0000000000000000,
@@ -31,107 +20,99 @@ pub const MIN_NEGATIVE_VALUE: U256 = U256::from_limbs([
 
 const FLIPH_BITMASK_U64: u64 = 0x7FFFFFFFFFFFFFFF;
 
-#[cfg_attr(test, derive(PropTestArbitrary))]
-#[cfg_attr(any(test, feature = "arbitrary"), derive(Arbitrary))]
-#[derive(Copy, Clone, Eq, PartialEq, Debug)]
-pub struct I256(pub Sign, pub U256);
-
 #[inline(always)]
-pub fn i256_sign<const DO_TWO_COMPL: bool>(val: &mut U256) -> Sign {
-    if !val.bit(U256::BITS - 1) {
-        if *val == U256::ZERO {
-            Sign::Zero
-        } else {
-            Sign::Plus
-        }
-    } else {
-        if DO_TWO_COMPL {
-            two_compl_mut(val);
-        }
+pub(super) fn i256_sign(val: &U256) -> Sign {
+    if val.bit(U256::BITS - 1) {
         Sign::Minus
+    } else {
+        // SAFETY: false == 0 == Zero, true == 1 == Plus
+        unsafe { core::mem::transmute(*val != U256::ZERO) }
+    }
+}
+
+#[inline(always)]
+pub(super) fn i256_sign_compl(val: &mut U256) -> Sign {
+    let sign = i256_sign(val);
+    if sign == Sign::Minus {
+        two_compl_mut(val);
     }
+    sign
 }
 
 #[inline(always)]
 fn u256_remove_sign(val: &mut U256) {
+    // SAFETY: U256 does not have any padding bytes
     unsafe {
         val.as_limbs_mut()[3] &= FLIPH_BITMASK_U64;
     }
 }
 
 #[inline(always)]
-pub fn two_compl_mut(op: &mut U256) {
+pub(super) fn two_compl_mut(op: &mut U256) {
     *op = two_compl(*op);
 }
 
-pub fn two_compl(op: U256) -> U256 {
+#[inline(always)]
+pub(super) fn two_compl(op: U256) -> U256 {
     op.wrapping_neg()
 }
 
 #[inline(always)]
-pub fn i256_cmp(mut first: U256, mut second: U256) -> Ordering {
-    let first_sign = i256_sign::<false>(&mut first);
-    let second_sign = i256_sign::<false>(&mut second);
-    match (first_sign, second_sign) {
-        (Sign::Zero, Sign::Zero) => Ordering::Equal,
-        (Sign::Zero, Sign::Plus) => Ordering::Less,
-        (Sign::Zero, Sign::Minus) => Ordering::Greater,
-        (Sign::Minus, Sign::Zero) => Ordering::Less,
-        (Sign::Minus, Sign::Plus) => Ordering::Less,
-        (Sign::Minus, Sign::Minus) => first.cmp(&second),
-        (Sign::Plus, Sign::Minus) => Ordering::Greater,
-        (Sign::Plus, Sign::Zero) => Ordering::Greater,
-        (Sign::Plus, Sign::Plus) => first.cmp(&second),
+pub(super) fn i256_cmp(first: &U256, second: &U256) -> Ordering {
+    let first_sign = i256_sign(first);
+    let second_sign = i256_sign(second);
+    match first_sign.cmp(&second_sign) {
+        // note: adding `if first_sign != Sign::Zero` to short circuit zero comparisons performs
+        // slower on average, as of #582
+        Ordering::Equal => first.cmp(second),
+        o => o,
     }
 }
 
 #[inline(always)]
-pub fn i256_div(mut first: U256, mut second: U256) -> U256 {
-    let second_sign = i256_sign::<true>(&mut second);
+pub(super) fn i256_div(mut first: U256, mut second: U256) -> U256 {
+    let second_sign = i256_sign_compl(&mut second);
     if second_sign == Sign::Zero {
         return U256::ZERO;
     }
-    let first_sign = i256_sign::<true>(&mut first);
+
+    let first_sign = i256_sign_compl(&mut first);
     if first_sign == Sign::Minus && first == MIN_NEGATIVE_VALUE && second == U256::from(1) {
         return two_compl(MIN_NEGATIVE_VALUE);
     }
 
-    //let mut d = first / second;
-    let mut d = first.div_rem(second).0;
+    // necessary overflow checks are done above, perform the division
+    let mut d = first / second;
 
+    // set sign bit to zero
     u256_remove_sign(&mut d);
-    //set sign bit to zero
 
-    if d == U256::ZERO {
-        return U256::ZERO;
-    }
-
-    match (first_sign, second_sign) {
-        (Sign::Zero, Sign::Plus)
-        | (Sign::Plus, Sign::Zero)
-        | (Sign::Zero, Sign::Zero)
-        | (Sign::Plus, Sign::Plus)
-        | (Sign::Minus, Sign::Minus) => d,
-        (Sign::Zero, Sign::Minus)
-        | (Sign::Plus, Sign::Minus)
-        | (Sign::Minus, Sign::Zero)
-        | (Sign::Minus, Sign::Plus) => two_compl(d),
+    // two's complement only if the signs are different
+    // note: this condition has better codegen than an exhaustive match, as of #582
+    if (first_sign == Sign::Minus && second_sign != Sign::Minus)
+        || (second_sign == Sign::Minus && first_sign != Sign::Minus)
+    {
+        two_compl(d)
+    } else {
+        d
     }
 }
 
 #[inline(always)]
-pub fn i256_mod(mut first: U256, mut second: U256) -> U256 {
-    let first_sign = i256_sign::<true>(&mut first);
+pub(super) fn i256_mod(mut first: U256, mut second: U256) -> U256 {
+    let first_sign = i256_sign_compl(&mut first);
     if first_sign == Sign::Zero {
         return U256::ZERO;
     }
 
-    let _ = i256_sign::<true>(&mut second);
+    let _ = i256_sign_compl(&mut second);
+
+    // necessary overflow checks are done above, perform the operation
     let mut r = first % second;
+
+    // set sign bit to zero
     u256_remove_sign(&mut r);
-    if r == U256::ZERO {
-        return U256::ZERO;
-    }
+
     if first_sign == Sign::Minus {
         two_compl(r)
     } else {
@@ -171,9 +152,4 @@ mod tests {
         assert_eq!(i256_div(one_hundred, minus_one), neg_one_hundred);
         assert_eq!(i256_div(one_hundred, two), fifty);
     }
-
-    #[test]
-    fn arbitrary() {
-        proptest::proptest!(|(_value: I256)| { })
-    }
 }