Skip to content
Prev Previous commit
Next Next commit
smallfp gcd
  • Loading branch information
z-tech committed Mar 30, 2026
commit 7439b22d868a14f80a3a6a5b70e6294e5c5d4790
128 changes: 104 additions & 24 deletions ff-macros/src/small_fp/montgomery_backend.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use super::*;
use crate::small_fp::utils::{
compute_large_subgroup_root, compute_two_adic_root_of_unity, compute_two_adicity,
generate_montgomery_bigint_casts, generate_sqrt_precomputation, mod_mul_const,
generate_montgomery_bigint_casts, generate_sqrt_precomputation, mod_mul_const, pow_mod_const,
};
use crate::utils::find_conservative_subgroup_base;

Expand Down Expand Up @@ -60,6 +60,7 @@ pub(crate) fn backend_impl(

// Generate multiplication implementation based on type
let mul_impl = generate_mul_impl(ty, modulus, k_bits, r_mask, n_prime);
let inverse_impl = generate_inverse_impl(ty, modulus, r_mod_n, r2);

let type_bits = match ty_str.as_str() {
"u8" => 8u32,
Expand Down Expand Up @@ -137,6 +138,8 @@ pub(crate) fn backend_impl(

#mul_impl

#inverse_impl

#[inline(always)]
fn sum_of_products<const T: usize>(
a: &[SmallFp<Self>; T],
Expand Down Expand Up @@ -173,29 +176,6 @@ pub(crate) fn backend_impl(
Self::mul_assign(a, &tmp);
}

fn inverse(a: &SmallFp<Self>) -> Option<SmallFp<Self>> {
if a.value == 0 {
return None;
}

let mut result = Self::ONE;
let mut base = *a;
let mut exp = Self::MODULUS - 2;

while exp > 0 {
if exp & 1 == 1 {
Self::mul_assign(&mut result, &base);
}

let mut sq = base;
Self::square_in_place(&mut sq);
base = sq;
exp >>= 1;
}

Some(result)
}

#[inline]
fn new(value: Self::T) -> SmallFp<Self> {
let reduced_value = value % Self::MODULUS;
Expand All @@ -213,6 +193,106 @@ pub(crate) fn backend_impl(
(ts, r_mod_n)
}

// Generates the inverse function using the binary extended GCD algorithm.
//
// The GCD algorithm runs for NUM_ITERS = 2*FIELD_BITS - 2 iterations of cheap integer ops
// (no modular reduction), returning v ≡ 2^NUM_ITERS · (a·R)^{-1} (mod P). A single
// Montgomery multiplication by the precomputed constant C = R^3 · 2^{-NUM_ITERS} mod P
// then corrects the result to a^{-1}·R mod P (the Montgomery form of the inverse).
fn generate_inverse_impl(
ty: &proc_macro2::TokenStream,
modulus: u128,
r_mod_n: u128,
r2: u128,
) -> proc_macro2::TokenStream {
let ty_str = ty.to_string();

// FIELD_BITS = bit-length of modulus; NUM_ITERS = 2*FIELD_BITS - 2 ensures convergence.
let field_bits = 128 - modulus.leading_zeros();
let num_iters = 2 * field_bits - 2;

// Correction constant: C = R^3 · 2^{-NUM_ITERS} mod P
// 2^{-1} mod P = (P+1)/2 (P is odd)
// 2^{-NUM_ITERS} mod P = ((P+1)/2)^NUM_ITERS mod P
let half = (modulus + 1) / 2;
let two_neg_iters = pow_mod_const(half, num_iters as u128, modulus);
let r3 = mod_mul_const(r2, r_mod_n, modulus);
let corr = mod_mul_const(r3, two_neg_iters, modulus);

if ty_str == "u64" {
quote! {
#[inline]
fn inverse(a: &SmallFp<Self>) -> Option<SmallFp<Self>> {
if a.value == 0 {
return None;
}
// Binary extended GCD: v = 2^NUM_ITERS · (a·R)^{-1} mod P
let mut aa: u128 = a.value as u128;
let mut bb: u128 = Self::MODULUS as u128;
let mut u: i128 = 1;
let mut v: i128 = 0;
let mut i = 0u32;
while i < #num_iters {
if aa & 1 != 0 {
if aa < bb {
let tmp_a = aa; aa = bb; bb = tmp_a;
let tmp_u = u; u = v; v = tmp_u;
}
aa -= bb;
u -= v;
}
aa >>= 1;
v <<= 1;
i += 1;
}
let p = Self::MODULUS as i128;
let v_reduced = ((v % p) + p) as u128 % Self::MODULUS as u128;
// Multiply by C = R^3 · 2^{-NUM_ITERS} mod P to get a^{-1}·R mod P
let mut result = SmallFp::from_raw(v_reduced as Self::T);
let corr = SmallFp::from_raw(#corr as Self::T);
Self::mul_assign(&mut result, &corr);
Some(result)
}
}
} else {
// u8, u16, u32: GCD coefficients fit in i64 (|v| ≤ 2^{2*32-2} = 2^62 < 2^63)
quote! {
#[inline]
fn inverse(a: &SmallFp<Self>) -> Option<SmallFp<Self>> {
if a.value == 0 {
return None;
}
// Binary extended GCD: v = 2^NUM_ITERS · (a·R)^{-1} mod P
let mut aa: u64 = a.value as u64;
let mut bb: u64 = Self::MODULUS as u64;
let mut u: i64 = 1;
let mut v: i64 = 0;
let mut i = 0u32;
while i < #num_iters {
if aa & 1 != 0 {
if aa < bb {
let tmp_a = aa; aa = bb; bb = tmp_a;
let tmp_u = u; u = v; v = tmp_u;
}
aa -= bb;
u -= v;
}
aa >>= 1;
v <<= 1;
i += 1;
}
let p = Self::MODULUS as i64;
let v_reduced = ((v % p) + p) as u64 % Self::MODULUS as u64;
// Multiply by C = R^3 · 2^{-NUM_ITERS} mod P to get a^{-1}·R mod P
let mut result = SmallFp::from_raw(v_reduced as Self::T);
let corr = SmallFp::from_raw(#corr as Self::T);
Self::mul_assign(&mut result, &corr);
Some(result)
}
}
}
}

// Selects the appropriate multiplication algorithm at compile time
// by widening to the next-largest primitive type for the product
fn generate_mul_impl(
Expand Down
2 changes: 1 addition & 1 deletion ff-macros/src/small_fp/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ pub(crate) const fn mod_mul_const(a: u128, b: u128, modulus: u128) -> u128 {
}
}

const fn pow_mod_const(mut base: u128, mut exp: u128, modulus: u128) -> u128 {
pub(crate) const fn pow_mod_const(mut base: u128, mut exp: u128, modulus: u128) -> u128 {
let mut result = 1;
base %= modulus;
while exp > 0 {
Expand Down