Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
9622a79
Implement LLVM x86 vpclmulqdq intrinsics
TDecking Oct 22, 2024
05530b6
Adjust the vpclmulqdq test case
TDecking Oct 26, 2024
fb7bcd1
Merge pull request #3987 from TDecking/vpclmul
RalfJung Oct 27, 2024
cdc40a4
Add option for generating coverage reports
Mandragorian Oct 9, 2024
9023e35
Merge pull request #3954 from Mandragorian/coverage-report
RalfJung Oct 28, 2024
c8ce9e6
Android: Added syscall support
YohDeadfall Oct 25, 2024
f736269
Merge pull request #3992 from YohDeadfall/android-syscall
RalfJung Oct 28, 2024
6365ea1
contributing guide: mention expectations around force pushes and squa…
RalfJung Oct 27, 2024
d581d80
Merge pull request #3998 from RalfJung/contrib
oli-obk Oct 28, 2024
ff6e703
Preparing for merge from rustc
Oct 30, 2024
7d12e50
Merge from rustc
Oct 30, 2024
8a5f34a
Merge pull request #4001 from rust-lang/rustup-2024-10-30
RalfJung Oct 30, 2024
042f762
Change futex_wait errno from Scalar to IoError
noahmbright Oct 29, 2024
4ca9c07
Merge pull request #4000 from noahmbright/futex
RalfJung Oct 30, 2024
c5e86c4
Fixed a typo in the GetThreadDescription shim
YohDeadfall Oct 30, 2024
445a340
Merge pull request #4003 from YohDeadfall/windows-shim-typo
saethlin Oct 31, 2024
1c88af6
Preparing for merge from rustc
Oct 31, 2024
4828592
Merge from rustc
Oct 31, 2024
ef6b9a9
fmt
Oct 31, 2024
b8bd7be
silence clippy
RalfJung Oct 31, 2024
6630802
Merge pull request #4005 from rust-lang/rustup-2024-10-31
RalfJung Oct 31, 2024
06d869b
Preparing for merge from rustc
RalfJung Nov 2, 2024
3253cc6
Merge from rustc
RalfJung Nov 2, 2024
c1b8d66
teach clippy about IeeeFloat, and make all 'allow' into 'expect'
RalfJung Nov 2, 2024
c67ea44
Merge pull request #4009 from RalfJung/rustup
RalfJung Nov 2, 2024
9c75eff
Preparing for merge from rustc
Nov 3, 2024
b58cbe2
Merge from rustc
Nov 3, 2024
b17cf41
Merge pull request #4010 from rust-lang/rustup-2024-11-03
RalfJung Nov 3, 2024
dc62c34
Renamed ecx variales to this
YohDeadfall Nov 7, 2024
1254d8e
Get/set thread name shims return errors for invalid handles
YohDeadfall Oct 30, 2024
c8e089e
Merge pull request #4018 from YohDeadfall/ecx-name-standardization
RalfJung Nov 8, 2024
d7aceee
Merge pull request #4004 from YohDeadfall/thread-name-ice-fix
RalfJung Nov 8, 2024
beb8d6f
Preparing for merge from rustc
Nov 9, 2024
15d883e
Merge from rustc
Nov 9, 2024
c272bb4
Merge pull request #4019 from rust-lang/rustup-2024-11-09
saethlin Nov 9, 2024
fe39888
pthread-sync: avoid confusing error when running with preemption
RalfJung Nov 9, 2024
5ff90d0
Merge pull request #4020 from RalfJung/thread-sope
RalfJung Nov 9, 2024
ce7a560
Preparing for merge from rustc
RalfJung Nov 10, 2024
a01f37c
Merge from rustc
RalfJung Nov 10, 2024
a839fbf
Merge pull request #4021 from RalfJung/rustup
RalfJung Nov 10, 2024
d1a4812
store futexes in per-allocation data rather than globally
RalfJung Oct 12, 2024
673d9c3
Merge pull request #3971 from RalfJung/futex-virtual
RalfJung Nov 10, 2024
e8a3ffe
fix linux-futex test being accidentally disabled
RalfJung Nov 10, 2024
881f2ec
Merge pull request #4022 from RalfJung/linux-futex
RalfJung Nov 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 56 additions & 38 deletions src/tools/miri/src/shims/x86/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,22 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
}
}

"pclmulqdq" => {
"pclmulqdq" | "pclmulqdq.256" | "pclmulqdq.512" => {
let mut len = 2; // in units of 64bits
this.expect_target_feature_for_intrinsic(link_name, "pclmulqdq")?;
if unprefixed_name.ends_with(".256") {
this.expect_target_feature_for_intrinsic(link_name, "vpclmulqdq")?;
len = 4;
} else if unprefixed_name.ends_with(".512") {
this.expect_target_feature_for_intrinsic(link_name, "vpclmulqdq")?;
this.expect_target_feature_for_intrinsic(link_name, "avx512f")?;
len = 8;
}

let [left, right, imm] =
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

pclmulqdq(this, left, right, imm, dest)?;
pclmulqdq(this, left, right, imm, dest, len)?;
}

name if name.starts_with("bmi.") => {
Expand Down Expand Up @@ -1134,61 +1145,68 @@ fn pmulhrsw<'tcx>(
/// Perform a carry-less multiplication of two 64-bit integers, selected from `left` and `right` according to `imm8`,
/// and store the results in `dst`.
///
/// `left` and `right` are both vectors of type 2 x i64. Only bits 0 and 4 of `imm8` matter;
/// `left` and `right` are both vectors of type `len` x i64. Only bits 0 and 4 of `imm8` matter;
/// they select the element of `left` and `right`, respectively.
///
/// `len` is the SIMD vector length (in counts of `i64` values). It is expected to be one of
/// `2`, `4`, or `8`.
///
/// <https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128>
fn pclmulqdq<'tcx>(
this: &mut MiriInterpCx<'tcx>,
left: &OpTy<'tcx>,
right: &OpTy<'tcx>,
imm8: &OpTy<'tcx>,
dest: &MPlaceTy<'tcx>,
len: u64,
) -> InterpResult<'tcx, ()> {
assert_eq!(left.layout, right.layout);
assert_eq!(left.layout.size, dest.layout.size);
assert!([2u64, 4, 8].contains(&len));

// Transmute to `[u64; 2]`
// Transmute the input into arrays of `[u64; len]`.
// Transmute the output into an array of `[u128, len / 2]`.

let array_layout = this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u64, 2))?;
let left = left.transmute(array_layout, this)?;
let right = right.transmute(array_layout, this)?;
let dest = dest.transmute(array_layout, this)?;
let src_layout = this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u64, len))?;
let dest_layout = this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u128, len / 2))?;

let left = left.transmute(src_layout, this)?;
let right = right.transmute(src_layout, this)?;
let dest = dest.transmute(dest_layout, this)?;

let imm8 = this.read_scalar(imm8)?.to_u8()?;

// select the 64-bit integer from left that the user specified (low or high)
let index = if (imm8 & 0x01) == 0 { 0 } else { 1 };
let left = this.read_scalar(&this.project_index(&left, index)?)?.to_u64()?;

// select the 64-bit integer from right that the user specified (low or high)
let index = if (imm8 & 0x10) == 0 { 0 } else { 1 };
let right = this.read_scalar(&this.project_index(&right, index)?)?.to_u64()?;

// Perform carry-less multiplication
//
// This operation is like long multiplication, but ignores all carries.
// That idea corresponds to the xor operator, which is used in the implementation.
//
// Wikipedia has an example https://en.wikipedia.org/wiki/Carry-less_product#Example
let mut result: u128 = 0;

for i in 0..64 {
// if the i-th bit in right is set
if (right & (1 << i)) != 0 {
// xor result with `left` shifted to the left by i positions
result ^= u128::from(left) << i;
for i in 0..(len / 2) {
let lo = i.strict_mul(2);
let hi = i.strict_mul(2).strict_add(1);

// select the 64-bit integer from left that the user specified (low or high)
let index = if (imm8 & 0x01) == 0 { lo } else { hi };
let left = this.read_scalar(&this.project_index(&left, index)?)?.to_u64()?;

// select the 64-bit integer from right that the user specified (low or high)
let index = if (imm8 & 0x10) == 0 { lo } else { hi };
let right = this.read_scalar(&this.project_index(&right, index)?)?.to_u64()?;

// Perform carry-less multiplication.
//
// This operation is like long multiplication, but ignores all carries.
// That idea corresponds to the xor operator, which is used in the implementation.
//
// Wikipedia has an example https://en.wikipedia.org/wiki/Carry-less_product#Example
let mut result: u128 = 0;

for i in 0..64 {
// if the i-th bit in right is set
if (right & (1 << i)) != 0 {
// xor result with `left` shifted to the left by i positions
result ^= u128::from(left) << i;
}
}
}

let result_low = (result & 0xFFFF_FFFF_FFFF_FFFF) as u64;
let result_high = (result >> 64) as u64;

let dest_low = this.project_index(&dest, 0)?;
this.write_scalar(Scalar::from_u64(result_low), &dest_low)?;

let dest_high = this.project_index(&dest, 1)?;
this.write_scalar(Scalar::from_u64(result_high), &dest_high)?;
let dest = this.project_index(&dest, i)?;
this.write_scalar(Scalar::from_u128(result), &dest)?;
}

interp_ok(())
}
Expand Down
193 changes: 193 additions & 0 deletions src/tools/miri/tests/pass/shims/x86/intrinsics-x86-vpclmulqdq.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
// We're testing x86 target specific features
//@revisions: avx512 avx
//@only-target: x86_64 i686
//@[avx512]compile-flags: -C target-feature=+vpclmulqdq,+avx512f
//@[avx]compile-flags: -C target-feature=+vpclmulqdq,+avx2

// The constants in the tests below are just bit patterns. They should not
// be interpreted as integers; signedness does not make sense for them, but
// __mXXXi happens to be defined in terms of signed integers.
#![allow(overflowing_literals)]
#![feature(avx512_target_feature)]
#![feature(stdarch_x86_avx512)]

#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::mem::transmute;

fn main() {
// Mostly copied from library/stdarch/crates/core_arch/src/x86/vpclmulqdq.rs

assert!(is_x86_feature_detected!("pclmulqdq"));
assert!(is_x86_feature_detected!("vpclmulqdq"));

unsafe {
test_mm256_clmulepi64_epi128();

if is_x86_feature_detected!("avx512f") {
test_mm512_clmulepi64_epi128();
}
}
}

macro_rules! verify_kat_pclmul {
($broadcast:ident, $clmul:ident, $assert:ident) => {
// Constants taken from https://software.intel.com/sites/default/files/managed/72/cc/clmul-wp-rev-2.02-2014-04-20.pdf
let a = _mm_set_epi64x(0x7b5b546573745665, 0x63746f725d53475d);
let a = $broadcast(a);
let b = _mm_set_epi64x(0x4869285368617929, 0x5b477565726f6e5d);
let b = $broadcast(b);
let r00 = _mm_set_epi64x(0x1d4d84c85c3440c0, 0x929633d5d36f0451);
let r00 = $broadcast(r00);
let r01 = _mm_set_epi64x(0x1bd17c8d556ab5a1, 0x7fa540ac2a281315);
let r01 = $broadcast(r01);
let r10 = _mm_set_epi64x(0x1a2bf6db3a30862f, 0xbabf262df4b7d5c9);
let r10 = $broadcast(r10);
let r11 = _mm_set_epi64x(0x1d1e1f2c592e7c45, 0xd66ee03e410fd4ed);
let r11 = $broadcast(r11);

$assert($clmul::<0x00>(a, b), r00);
$assert($clmul::<0x10>(a, b), r01);
$assert($clmul::<0x01>(a, b), r10);
$assert($clmul::<0x11>(a, b), r11);

let a0 = _mm_set_epi64x(0x0000000000000000, 0x8000000000000000);
let a0 = $broadcast(a0);
let r = _mm_set_epi64x(0x4000000000000000, 0x0000000000000000);
let r = $broadcast(r);
$assert($clmul::<0x00>(a0, a0), r);
}
}

// this function tests one of the possible 4 instances
// with different inputs across lanes for the 512-bit version
#[target_feature(enable = "vpclmulqdq,avx512f")]
unsafe fn verify_512_helper(
linear: unsafe fn(__m128i, __m128i) -> __m128i,
vectorized: unsafe fn(__m512i, __m512i) -> __m512i,
) {
let a = _mm512_set_epi64(
0xDCB4DB3657BF0B7D,
0x18DB0601068EDD9F,
0xB76B908233200DC5,
0xE478235FA8E22D5E,
0xAB05CFFA2621154C,
0x1171B47A186174C9,
0x8C6B6C0E7595CEC9,
0xBE3E7D4934E961BD,
);
let b = _mm512_set_epi64(
0x672F6F105A94CEA7,
0x8298B8FFCA5F829C,
0xA3927047B3FB61D8,
0x978093862CDE7187,
0xB1927AB22F31D0EC,
0xA9A5DA619BE4D7AF,
0xCA2590F56884FDC6,
0x19BE9F660038BDB5,
);

let a_decomp = transmute::<_, [__m128i; 4]>(a);
let b_decomp = transmute::<_, [__m128i; 4]>(b);

let r = vectorized(a, b);

let e_decomp = [
linear(a_decomp[0], b_decomp[0]),
linear(a_decomp[1], b_decomp[1]),
linear(a_decomp[2], b_decomp[2]),
linear(a_decomp[3], b_decomp[3]),
];
let e = transmute::<_, __m512i>(e_decomp);

assert_eq_m512i(r, e)
}

// this function tests one of the possible 4 instances
// with different inputs across lanes for the 256-bit version
#[target_feature(enable = "vpclmulqdq")]
unsafe fn verify_256_helper(
linear: unsafe fn(__m128i, __m128i) -> __m128i,
vectorized: unsafe fn(__m256i, __m256i) -> __m256i,
) {
let a = _mm256_set_epi64x(
0xDCB4DB3657BF0B7D,
0x18DB0601068EDD9F,
0xB76B908233200DC5,
0xE478235FA8E22D5E,
);
let b = _mm256_set_epi64x(
0x672F6F105A94CEA7,
0x8298B8FFCA5F829C,
0xA3927047B3FB61D8,
0x978093862CDE7187,
);

let a_decomp = transmute::<_, [__m128i; 2]>(a);
let b_decomp = transmute::<_, [__m128i; 2]>(b);

let r = vectorized(a, b);

let e_decomp = [linear(a_decomp[0], b_decomp[0]), linear(a_decomp[1], b_decomp[1])];
let e = transmute::<_, __m256i>(e_decomp);

assert_eq_m256i(r, e)
}

#[target_feature(enable = "vpclmulqdq,avx512f")]
unsafe fn test_mm512_clmulepi64_epi128() {
verify_kat_pclmul!(_mm512_broadcast_i32x4, _mm512_clmulepi64_epi128, assert_eq_m512i);

verify_512_helper(
|a, b| _mm_clmulepi64_si128::<0x00>(a, b),
|a, b| _mm512_clmulepi64_epi128::<0x00>(a, b),
);
verify_512_helper(
|a, b| _mm_clmulepi64_si128::<0x01>(a, b),
|a, b| _mm512_clmulepi64_epi128::<0x01>(a, b),
);
verify_512_helper(
|a, b| _mm_clmulepi64_si128::<0x10>(a, b),
|a, b| _mm512_clmulepi64_epi128::<0x10>(a, b),
);
verify_512_helper(
|a, b| _mm_clmulepi64_si128::<0x11>(a, b),
|a, b| _mm512_clmulepi64_epi128::<0x11>(a, b),
);
}

#[target_feature(enable = "vpclmulqdq")]
unsafe fn test_mm256_clmulepi64_epi128() {
verify_kat_pclmul!(_mm256_broadcastsi128_si256, _mm256_clmulepi64_epi128, assert_eq_m256i);

verify_256_helper(
|a, b| _mm_clmulepi64_si128::<0x00>(a, b),
|a, b| _mm256_clmulepi64_epi128::<0x00>(a, b),
);
verify_256_helper(
|a, b| _mm_clmulepi64_si128::<0x01>(a, b),
|a, b| _mm256_clmulepi64_epi128::<0x01>(a, b),
);
verify_256_helper(
|a, b| _mm_clmulepi64_si128::<0x10>(a, b),
|a, b| _mm256_clmulepi64_epi128::<0x10>(a, b),
);
verify_256_helper(
|a, b| _mm_clmulepi64_si128::<0x11>(a, b),
|a, b| _mm256_clmulepi64_epi128::<0x11>(a, b),
);
}

#[track_caller]
#[target_feature(enable = "avx512f")]
unsafe fn assert_eq_m512i(a: __m512i, b: __m512i) {
assert_eq!(transmute::<_, [u64; 8]>(a), transmute::<_, [u64; 8]>(b))
}

#[track_caller]
#[target_feature(enable = "avx")]
unsafe fn assert_eq_m256i(a: __m256i, b: __m256i) {
assert_eq!(transmute::<_, [u64; 4]>(a), transmute::<_, [u64; 4]>(b))
}