Skip to content

Commit 1a998d5

Browse files
authored
Merge pull request #62 from CryZe/128-bit-on-more-platforms
Use 128-bit Widening Multiply on More Platforms
2 parents dc5c33f + 6849c16 commit 1a998d5

File tree

1 file changed

+21
-9
lines changed

1 file changed

+21
-9
lines changed

src/lib.rs

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -197,11 +197,26 @@ const PREVENT_TRIVIAL_ZERO_COLLAPSE: u64 = 0xa4093822299f31d0;
197197

198198
#[inline]
199199
fn multiply_mix(x: u64, y: u64) -> u64 {
200-
#[cfg(target_pointer_width = "64")]
201-
{
200+
// The following code path is only fast if 64-bit to 128-bit widening
201+
// multiplication is supported by the architecture. Most 64-bit
202+
// architectures except SPARC64 and Wasm64 support it. However, the target
203+
// pointer width doesn't always indicate that we are dealing with a 64-bit
204+
// architecture, as there are ABIs that reduce the pointer width, especially
205+
// on AArch64 and x86-64. WebAssembly (regardless of pointer width) supports
206+
// 64-bit to 128-bit widening multiplication with the `wide-arithmetic`
207+
// proposal.
208+
if cfg!(any(
209+
all(
210+
target_pointer_width = "64",
211+
not(any(target_arch = "sparc64", target_arch = "wasm64")),
212+
),
213+
target_arch = "aarch64",
214+
target_arch = "x86_64",
215+
all(target_family = "wasm", target_feature = "wide-arithmetic"),
216+
)) {
202217
// We compute the full u64 x u64 -> u128 product, this is a single mul
203218
// instruction on x86-64, one mul plus one mulhi on ARM64.
204-
let full = (x as u128) * (y as u128);
219+
let full = (x as u128).wrapping_mul(y as u128);
205220
let lo = full as u64;
206221
let hi = (full >> 64) as u64;
207222

@@ -216,10 +231,7 @@ fn multiply_mix(x: u64, y: u64) -> u64 {
216231
// x * y = 2^64 * hi + lo = (-1) * hi + lo = lo - hi, (mod 2^64 + 1)
217232
// x * y = 2^64 * hi + lo = 1 * hi + lo = lo + hi, (mod 2^64 - 1)
218233
// Multiplicative hashing is universal in a field (like mod p).
219-
}
220-
221-
#[cfg(target_pointer_width = "32")]
222-
{
234+
} else {
223235
// u64 x u64 -> u128 product is prohibitively expensive on 32-bit.
224236
// Decompose into 32-bit parts.
225237
let lx = x as u32;
@@ -228,8 +240,8 @@ fn multiply_mix(x: u64, y: u64) -> u64 {
228240
let hy = (y >> 32) as u32;
229241

230242
// u32 x u32 -> u64 the low bits of one with the high bits of the other.
231-
let afull = (lx as u64) * (hy as u64);
232-
let bfull = (hx as u64) * (ly as u64);
243+
let afull = (lx as u64).wrapping_mul(hy as u64);
244+
let bfull = (hx as u64).wrapping_mul(ly as u64);
233245

234246
// Combine, swapping low/high of one of them so the upper bits of the
235247
// product of one combine with the lower bits of the other.

0 commit comments

Comments
 (0)