Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add __float128 support for Midpoint754
  • Loading branch information
ryanelandt committed Aug 18, 2023
commit 21dc82bdc678b1d857c18794c8c3b7ed60374d30
119 changes: 86 additions & 33 deletions include/boost/math/tools/roots.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
#include <boost/math/tools/toms748_solve.hpp>
#include <boost/math/policies/error_handling.hpp>

#ifdef BOOST_HAS_FLOAT128
#include <boost/multiprecision/float128.hpp>
using boost::multiprecision::float128;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As above, we shouldn't be forcing multiprecision headers to be included here, plus the using declaration at global header scope is evil ;)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This header and using declaration was unneeded. Good catch.

#endif

namespace boost {
namespace math {
namespace tools {
Expand Down Expand Up @@ -273,6 +278,9 @@ namespace Bisection {
static_assert(std::is_unsigned<U>::value, "U must be an unsigned integer type.");
static_assert(sizeof(T) == sizeof(U), "Type and uint size must be the same.");

// The sign bit is 1, all other bits are 0
static constexpr U sign_mask() { return U(1) << (sizeof(U) * 8 - 1); }

// Convert float to bits
static U float_to_uint(T x) {
U bits;
Expand All @@ -287,47 +295,73 @@ namespace Bisection {
return x;
}

// Avoids linking against libquadmath for __float128
static T fabs_imp(T x) {
U bits_x = float_to_uint(x);
bits_x = bits_x & ~sign_mask(); // Zero the sign bit
return uint_to_float(bits_x);
}

public:
static T solve(T x, T y) {
using std::fabs;

// Sort so that y has the larger magnitude
if (fabs(y) < fabs(x)) {
if (fabs_imp(y) < fabs_imp(x)) {
std::swap(x, y);
}

const T x_mag = std::fabs(x);
const T y_mag = std::fabs(y);
const T sign_x = sign(x);
const T sign_y = sign(y);

// Convert the magnitudes to bits
U bits_mag_x = float_to_uint(x_mag);
U bits_mag_y = float_to_uint(y_mag);
const U bits_x = float_to_uint(x);
const U bits_y = float_to_uint(y);

const U sign_x = bits_x & sign_mask();
const U sign_y = bits_y & sign_mask();

const U mag_x = bits_x & ~sign_mask();
const U mag_y = bits_y & ~sign_mask();

// Calculate the average magnitude in bits
U bits_mag = (sign_x == sign_y) ? (bits_mag_y + bits_mag_x) : (bits_mag_y - bits_mag_x);
U bits_mag = (sign_x == sign_y) ? (mag_y + mag_x) : (mag_y - mag_x);
bits_mag = bits_mag >> 1; // Divide by 2

// Reconstruct upl_mean from average magnitude and sign of y
return uint_to_float(bits_mag) * sign_y;
bits_mag = bits_mag | sign_y; // Make the sign of bits_mag match the sign of y

return uint_to_float(bits_mag);
}

// NOTE: boost::multiprecision::float128 is cast to __float128
template <typename V>
static V solve(V x, V y) { return solve(static_cast<T>(x), static_cast<T>(y)); }

// Must evaluate to true in order to bisect correctly with infinity
// Ideally this should be a static assert.
static bool is_one_plus_max_bits_inf() {
const U bits_max = float_to_uint((std::numeric_limits<T>::max)());
const U bits_one_plus_max = bits_max + 1;
return uint_to_float(bits_one_plus_max) == std::numeric_limits<T>::infinity();
}

using type_float = T; // Used for unit tests
}; // class Midpoint754


template <typename T>
class MidpointNon754 {
private:
// NOTE: The Midpoint754 solver is faster than this solver and should be used when possible.
// To use the Midpoint754 solver, two criteria must be satisfied:
// 1. The type T must conform to the IEEE 754 standard and
// 2. There must exist a corresponding unsigned integer type with the same number of bits.
// Currently, this limits the use of the Midpoint754 solver to `float` and `double`. The
// lack of a 128 bit unsigned integer type prevents the Midpoint754 solver being used for
// `long double`.
// NOTE: The Midpoint754 solver should be used when possible because it is faster
// than this solver. The two criteria below must be satifsied to use the Midpoint754
// solver:
// 1. The type T must conform to the IEEE 754 standard and
// 2. The following sequence of steps must produce `numeric_limits<T>::infinity`.
// Start with `numeric_limits<T>::max()`. Then reinterpret this value as an
// unsigned integer. Next add one to this unsigned integer. Finally reinterpret
// the result as type T. This result must equal infinity.
// The above two criteria are true for the following datatypes: `float`, `double`,
// and `__float128`. The 80 bit variation of `long double` does not satisfy the
// second criteria.
static_assert(!std::is_same<T, float>::value, "Need to use Midpoint754 solver when T is float");
static_assert(!std::is_same<T, double>::value, "Need to use Midpoint754 solver when T is double");
// static_assert(!std::is_same<T, long double>::value -- (See NOTE above)
#if defined(BOOST_HAS_INT128) && defined(BOOST_HAS_FLOAT128)
static_assert(!std::is_same<T, __float128>::value, "Need to use Midpoint754 solver when T is __float128");
#endif

public:
static T solve(T x, T y) {
Expand Down Expand Up @@ -422,6 +456,9 @@ namespace Bisection {
const U denorm_min = std::numeric_limits<U>::denorm_min();
if (denorm_min != 0) { return denorm_min; }

// NOTE: the two lines below can be removed when
// https://github.com/boostorg/multiprecision/pull/562
// gets merged.
const U min = (std::numeric_limits<U>::min)();
if (min != 0) { return min; }

Expand Down Expand Up @@ -467,16 +504,32 @@ namespace Bisection {
}
}; // class MidpointNon754

template <typename T>
static T calc_midpoint(T x, T y) {
return MidpointNon754<T>::solve(x, y);
}
static float calc_midpoint(float x, float y) {
return Midpoint754<float, std::uint32_t>::solve(x, y);
}
static double calc_midpoint(double x, double y) {
return Midpoint754<double, std::uint64_t>::solve(x, y);
}
// The purposes of this class is to not cause compiler warnings from unused functions.
class CalcMidpoint {
public:
template <typename T>
static MidpointNon754<T> get_solver(T) {
return MidpointNon754<T>();
}
static Midpoint754<float, std::uint32_t> get_solver(float) {
return Midpoint754<float, std::uint32_t>();
}
static Midpoint754<double, std::uint64_t> get_solver(double) {
return Midpoint754<double, std::uint64_t>();
}
#if defined(BOOST_HAS_INT128) && defined(BOOST_HAS_FLOAT128)
static Midpoint754<__float128, boost::uint128_type> get_solver(__float128) {
return Midpoint754<__float128, boost::uint128_type>();
}
static Midpoint754<__float128, boost::uint128_type> get_solver(boost::multiprecision::float128) {
return Midpoint754<__float128, boost::uint128_type>();
}
#endif
template <typename T>
static T calc_midpoint(T x, T y) {
return get_solver(x).solve(x, y);
}
};

} // namespace Bisection
} // namespace detail
Expand Down Expand Up @@ -549,7 +602,7 @@ T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t&
{
// Last two steps haven't converged.
const T x_other = (delta > 0) ? min : max;
delta = result - detail::Bisection::calc_midpoint(result, x_other);
delta = result - detail::Bisection::CalcMidpoint::calc_midpoint(result, x_other);
// reset delta1/2 so we don't take this branch next time round:
delta1 = 3 * delta;
delta2 = 3 * delta;
Expand Down
159 changes: 97 additions & 62 deletions test/test_roots.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
#include <boost/multiprecision/cpp_complex.hpp>
#include <boost/math/concepts/real_concept.hpp>

#ifdef BOOST_HAS_FLOAT128
#include <boost/multiprecision/float128.hpp>
#endif

#if __has_include(<stdfloat>)
# include <stdfloat>
#endif
Expand Down Expand Up @@ -655,93 +659,124 @@ void test_failures()
#endif
}

// Creates a vector of test values that include: zero, denorm_min, min, epsilon, max,
// and infinity. Also includes powers of 2 ranging from 2^-15 to 2^+15 by powers of 3.
template <typename T>
void test_bisect() {
// Creates a vector of test values that include: zero, denorm_min, min, epsilon, max,
// and infinity. Also includes powers of 2 ranging from 2^-15 to 2^+15 by powers of 3.
const auto fn_create_test_ladder = [](){
std::vector<T> v;

const auto fn_push_back_x_scaled = [&v](const T& x) {
const auto fn_add_if_non_zero_and_finite = [&v](const T& x) {
if (x != 0 && boost::math::isfinite(x)) { // Avoids most duplications
v.push_back(x);
}
};

const T two_thirds = T(2) / T(3);
const T four_thirds = T(4) / T(3);

v.push_back(x);
fn_add_if_non_zero_and_finite(x * T(0.5));
fn_add_if_non_zero_and_finite(x * two_thirds);
fn_add_if_non_zero_and_finite(x * four_thirds);
fn_add_if_non_zero_and_finite(x * T(2.0));
};

const auto fn_push_back_pm = [&](std::vector<T>& v, const T& x) {
fn_push_back_x_scaled( x);
fn_push_back_x_scaled(-x);
};

fn_push_back_pm(v, T(0.0));
fn_push_back_pm(v, std::numeric_limits<T>::denorm_min());
fn_push_back_pm(v, (std::numeric_limits<T>::min)());
fn_push_back_pm(v, std::numeric_limits<T>::epsilon());
const int test_exp_range = 5;
for (int i = -test_exp_range; i < (test_exp_range + 1); ++i) {
const int exponent = i * 3;
const T x = std::ldexp(1.0, exponent);
fn_push_back_pm(v, x);
}
fn_push_back_pm(v, (std::numeric_limits<T>::max)());
std::vector<T> create_test_ladder() {
std::vector<T> v;

const auto fn_push_back_x_scaled = [&v](const T& x) {
const T two_thirds = T(2) / T(3);
const T four_thirds = T(4) / T(3);

v.push_back(x);
v.push_back(x * T(0.5));
v.push_back(x * two_thirds);
v.push_back(x * four_thirds);
v.push_back(x * T(2.0));
};

// Real_concept doesn't have infinity
if (std::numeric_limits<T>::has_infinity) {
fn_push_back_pm(v, std::numeric_limits<T>::infinity());
}
const auto fn_push_back_pm = [&](std::vector<T>& v, const T& x) {
fn_push_back_x_scaled( x);
fn_push_back_x_scaled(-x);
};

std::sort(v.begin(), v.end());
fn_push_back_pm(v, std::numeric_limits<T>::denorm_min());
fn_push_back_pm(v, (std::numeric_limits<T>::min)());
fn_push_back_pm(v, std::numeric_limits<T>::epsilon());

const int num_zeros = std::count(v.begin(), v.end(), T(0.0));
BOOST_CHECK_LE(2, num_zeros); // Positive and negative zero
const int test_exp_range = 5;
for (int i = -test_exp_range; i < (test_exp_range + 1); ++i) {
const int exponent = i * 3;
const T x = std::ldexp(1.0, exponent);
fn_push_back_pm(v, x);
}
fn_push_back_pm(v, (std::numeric_limits<T>::max)());
fn_push_back_pm(v, std::numeric_limits<T>::infinity());

return v;
};
// Take unique elements of v
std::sort(v.begin(), v.end());
v.erase(std::unique(v.begin(), v.end()), v.end());

// Add both positive and negative zero
v.push_back(T(-0.0));
v.push_back(T(+0.0));

const int num_zeros = std::count(v.begin(), v.end(), T(0.0));
BOOST_CHECK_LE(2, num_zeros); // Check for both positive and negative zero

return v;
};

template <typename T, typename S>
void test_bisect_non(S solver) {
// Test for all combinations from the ladder
auto v = fn_create_test_ladder();
auto v = create_test_ladder<T>();

for (const T& x_i: v) {
for (const T& x_j: v) {
const T x_mid_ij = boost::math::tools::detail::Bisection::calc_midpoint(x_i, x_j);
const T x_mid_ij = solver.solve(x_i, x_j);

const T x_lo = (std::min)(x_i, x_j);
const T x_hi = (std::max)(x_i, x_j);

BOOST_CHECK_LE(x_lo, x_mid_ij);
BOOST_CHECK_LE(x_mid_ij, x_hi);
BOOST_CHECK(x_lo <= x_mid_ij); // NOTE: BOOST_CHECK_LE(x_lo, x_mid_ij) fails to link
BOOST_CHECK(x_mid_ij <= x_hi); // NOTE: BOOST_CHECK_LE(x_mid_ij, x_hi) fails to link
}
}
}

template <typename T>
void test_bisect_non() {
const auto solver = boost::math::tools::detail::Bisection::CalcMidpoint::get_solver(T(1)); // Input value unused
test_bisect_non<T,decltype(solver)>(solver);
}

template <typename S>
void test_bisect_754(S solver) {
BOOST_MATH_ASSERT(solver.is_one_plus_max_bits_inf()); // Catch infinity misbehavior for 80 bit `long double`
// before it causes downstream failures
test_bisect_non<typename S::type_float,S>(solver);
}

template <typename T>
void test_bisect_754() {
const auto solver = boost::math::tools::detail::Bisection::CalcMidpoint::get_solver(T(1)); // Input value unused
test_bisect_754(solver);
}

void test_long_double_fail() {
const auto solver = boost::math::tools::detail::Bisection::Midpoint754<long double,boost::uint128_type>();
BOOST_CHECK(!solver.is_one_plus_max_bits_inf());
}

void test_bisect_all_cases() {
test_bisect<float>();
test_bisect<double>();
test_bisect<long double>();
test_bisect<boost::math::concepts::real_concept>();
test_bisect<boost::multiprecision::cpp_bin_float_50>();
test_bisect<boost::multiprecision::cpp_bin_float_100>();
test_bisect<boost::multiprecision::cpp_dec_float_50>();
test_bisect<boost::multiprecision::cpp_dec_float_100>();
test_bisect_754<float>();
test_bisect_754<double>();
test_bisect_non<long double>();
test_bisect_non<boost::math::concepts::real_concept>();
test_bisect_non<boost::multiprecision::cpp_bin_float_50>();
test_bisect_non<boost::multiprecision::cpp_bin_float_100>();
test_bisect_non<boost::multiprecision::cpp_dec_float_50>();
test_bisect_non<boost::multiprecision::cpp_dec_float_100>();

#if defined(BOOST_HAS_FLOAT128) && defined(BOOST_HAS_INT128)
test_bisect_754<boost::multiprecision::float128>();
test_bisect_754<__float128>();
#endif

#if __has_include(<stdfloat>)
#ifdef __STDCPP_FLOAT32_T__
test_bisect<std::float32_t>();
test_bisect_754<std::float32_t>();
#endif
#ifdef __STDCPP_FLOAT64_T__
test_bisect<std::float64_t>();
test_bisect_754<std::float64_t>();
#endif
#endif

#if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && defined(BOOST_HAS_INT128)
test_long_double_fail();
#endif
}

void test_count() {
Expand Down