From 43d454da0202e5bdc66297b5492bc5c8cb69a848 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Thu, 9 Jan 2020 08:32:50 -0800 Subject: [PATCH 01/10] Refactor number parsing tests --- include/boost/json/detail/string_impl.hpp | 2 +- test/_detail_number.cpp | 356 ++++++++++++++-------- 2 files changed, 233 insertions(+), 125 deletions(-) diff --git a/include/boost/json/detail/string_impl.hpp b/include/boost/json/detail/string_impl.hpp index a4d90067f..80e1823ca 100644 --- a/include/boost/json/detail/string_impl.hpp +++ b/include/boost/json/detail/string_impl.hpp @@ -58,7 +58,7 @@ class string_impl struct sbo { - kind k; + kind k; // must come first char buf[sbo_chars_ + 1]; }; diff --git a/test/_detail_number.cpp b/test/_detail_number.cpp index b74aa3dc4..91e72e50e 100644 --- a/test/_detail_number.cpp +++ b/test/_detail_number.cpp @@ -10,6 +10,8 @@ // Test that header file is self-contained. #include +#include +#include #include #include "test_suite.hpp" @@ -18,10 +20,13 @@ namespace boost { namespace json { namespace detail { -class number_parser_test +class number_test { public: + test_suite::log_type log; + template + static void grind( string_view s, @@ -71,6 +76,49 @@ class number_parser_test } } + //------------------------------------------------------ + + void + testMembers() + { + // maybe_init + { + number_parser p; + BOOST_TEST(! p.maybe_init(0)); + BOOST_TEST(! p.maybe_init('A')); + BOOST_TEST(! p.maybe_init('a')); + BOOST_TEST(! p.maybe_init('.')); + BOOST_TEST(! p.maybe_init('!')); + BOOST_TEST(! p.maybe_init(' ')); + BOOST_TEST(p.maybe_init('0')); p.reset(); + BOOST_TEST(p.maybe_init('1')); p.reset(); + BOOST_TEST(p.maybe_init('2')); p.reset(); + BOOST_TEST(p.maybe_init('3')); p.reset(); + BOOST_TEST(p.maybe_init('4')); p.reset(); + BOOST_TEST(p.maybe_init('5')); p.reset(); + BOOST_TEST(p.maybe_init('6')); p.reset(); + BOOST_TEST(p.maybe_init('7')); p.reset(); + BOOST_TEST(p.maybe_init('8')); p.reset(); + BOOST_TEST(p.maybe_init('9')); p.reset(); + BOOST_TEST(p.maybe_init('0')); p.reset(); + BOOST_TEST(p.maybe_init('-')); p.reset(); + } + + // finish + { + error_code ec; + number_parser p; + p.write_some("0x", 2, ec); + if(BOOST_TEST(! ec)) + { + p.finish(ec); + BOOST_TEST(! ec); + } + } + } + + //------------------------------------------------------ + void check_bad(string_view s) { @@ -127,26 +175,8 @@ class number_parser_test } void - check_double( - string_view s, - double d) - { - grind(s, - [&](detail::number num) - { - if( BOOST_TEST( - num.kind == kind::double_)) - BOOST_TEST(num.d == d); - }); - } - - void - testParse() + testIntegers() { - check_double("-999999999999999999999", -999999999999999999999.0); - check_double("-100000000000000000009", -100000000000000000009.0); - check_double("-10000000000000000000", -10000000000000000000.0); - check_double("-9223372036854775809", -9223372036854775809.0); check_int64( "-9223372036854775808", INT64_MIN); check_int64( "-9223372036854775807", -9223372036854775807); check_int64( "-999999999999999999", -999999999999999999); @@ -187,83 +217,15 @@ class number_parser_test check_int64( "99999999999999999", 99999999999999999); check_int64( "999999999999999999", 999999999999999999); check_int64( "9223372036854775807", INT64_MAX); + check_uint64( "9223372036854775808", 9223372036854775808ULL); check_uint64( "9999999999999999999", 9999999999999999999ULL); check_uint64( "18446744073709551615", UINT64_MAX); - check_double( "18446744073709551616", 18446744073709551616.0); - check_double( "99999999999999999999", 99999999999999999999.0); - check_double( "999999999999999999999", 999999999999999999999.0); - check_double( "1000000000000000000000", 1000000000000000000000.0); - check_double( "9999999999999999999999", 9999999999999999999999.0); - check_double( "99999999999999999999999", 99999999999999999999999.0); - - check_double("-0.9999999999999999999999", -1.0000000000000002); - check_double("-0.9999999999999999", -1.0000000000000000); - check_double("-0.9007199254740991", -0.9007199254740991); // (2^53-1) - check_double("-0.999999999999999", -0.99999999999999911); - check_double("-0.99999999999999", -0.99999999999999001); - check_double("-0.9999999999999", -0.99999999999990008); - check_double("-0.999999999999", -0.99999999999900002); - check_double("-0.99999999999", -0.99999999998999989); - check_double("-0.9999999999", -0.99999999989999999); - check_double("-0.999999999", -0.99999999900000003); - check_double("-0.99999999", -0.99999999000000006); - check_double("-0.9999999", -0.99999989999999994); - check_double("-0.999999", -0.999999); - check_double("-0.99999", -0.99999); - check_double("-0.9999", -0.9999); - check_double("-0.8125", -0.8125); - check_double("-0.999", -0.999); - check_double("-0.99", -0.99); - check_double("-1.0", -1); - check_double("-0.9", -0.9); - check_double("-0.0", 0); - check_double( "0.0", 0); - check_double( "0.9", 0.9); - check_double( "0.99", 0.99); - check_double( "0.999", 0.999); - check_double( "0.8125", 0.8125); - check_double( "0.9999", 0.9999); - check_double( "0.99999", 0.99999); - check_double( "0.999999", 0.999999); - check_double( "0.9999999", 0.99999989999999994); - check_double( "0.99999999", 0.99999999000000006); - check_double( "0.999999999", 0.99999999900000003); - check_double( "0.9999999999", 0.99999999989999999); - check_double( "0.99999999999", 0.99999999998999989); - check_double( "0.999999999999", 0.99999999999900002); - check_double( "0.9999999999999", 0.99999999999990008); - check_double( "0.99999999999999", 0.99999999999999001); - check_double( "0.999999999999999", 0.99999999999999911); - check_double( "0.9007199254740991", 0.9007199254740991); // (2^53-1) - check_double( "0.9999999999999999", 1.0000000000000000); - check_double( "0.9999999999999999999999", 1.0000000000000002); - check_double( "0.999999999999999999999999999", 1.0000000000000002); - - check_double("-1e308", -1e308); - check_double("-1e-308", -1e-308); - check_double("-9999e300", -9999e300); - check_double("-999e100", -999e100); - check_double("-99e10", -99e10); - check_double("-9e1", -9e1); - check_double( "9e1", 9e1); - check_double( "99e10", 99e10); - check_double( "999e100", 999e100); - check_double( "9999e300", 9999e300); - check_double( "999999999999999999.0", 999999999999999999.0); - check_double( "999999999999999999999.0", 999999999999999999999.0); - check_double( "999999999999999999999e5", 999999999999999999999e5); - check_double( "999999999999999999999.0e5", 999999999999999999999.0e5); - - check_double( "0.00000000000000001", 0.00000000000000001); - - check_double("-1e-1", -1e-1); - check_double("-1e0", -1); - check_double("-1e1", -1e1); - check_double( "0e0", 0); - check_double( "1e0", 1); - check_double( "1e10", 1e10); + } + void + testBad() + { check_bad(""); check_bad("x"); check_bad("00"); @@ -287,54 +249,200 @@ class number_parser_test check_bad("-x"); } - void - testMembers() + //------------------------------------------------------ + + struct f_boost { - // maybe_init + static + string_view + name() noexcept { - number_parser p; - BOOST_TEST(! p.maybe_init(0)); - BOOST_TEST(! p.maybe_init('A')); - BOOST_TEST(! p.maybe_init('a')); - BOOST_TEST(! p.maybe_init('.')); - BOOST_TEST(! p.maybe_init('!')); - BOOST_TEST(! p.maybe_init(' ')); - BOOST_TEST(p.maybe_init('0')); p.reset(); - BOOST_TEST(p.maybe_init('1')); p.reset(); - BOOST_TEST(p.maybe_init('2')); p.reset(); - BOOST_TEST(p.maybe_init('3')); p.reset(); - BOOST_TEST(p.maybe_init('4')); p.reset(); - BOOST_TEST(p.maybe_init('5')); p.reset(); - BOOST_TEST(p.maybe_init('6')); p.reset(); - BOOST_TEST(p.maybe_init('7')); p.reset(); - BOOST_TEST(p.maybe_init('8')); p.reset(); - BOOST_TEST(p.maybe_init('9')); p.reset(); - BOOST_TEST(p.maybe_init('0')); p.reset(); - BOOST_TEST(p.maybe_init('-')); p.reset(); + return "boost"; } - // finish + double + operator()(string_view s) const { error_code ec; number_parser p; - p.write_some("0x", 2, ec); - if(BOOST_TEST(! ec)) - { - p.finish(ec); - BOOST_TEST(! ec); - } + p.write(s.data(), s.size(), ec); + if(ec) + BOOST_THROW_EXCEPTION( + system_error(ec)); + BOOST_TEST(p.is_done()); + auto const num = p.get(); + BOOST_ASSERT( + num.kind == kind::double_); + + grind(s, + [&](detail::number num1) + { + if( BOOST_TEST( + num1.kind == kind::double_)) + BOOST_TEST(num1.d == num.d); + }); + + return num.d; + } + }; + + // Verify that f converts to the + // same double produced by `strtod`. + // Requires `s` does not fit in an integral type. + template + void + fcheck(std::string const& s, F const& f) + { + char* str_end; + double const need = + std::strtod(s.c_str(), &str_end); + BOOST_TEST(str_end == &s.back() + 1); + double const got = f(s); + if(! BOOST_TEST(got == need)) + { + std::uint64_t uneed; + std::uint64_t ugot; + std::memcpy(&uneed, &need, sizeof(need)); + std::memcpy(&ugot, &got, sizeof(got)); + log << + std::hex << + std::setprecision( + std::numeric_limits< + double>::max_digits10) << + f.name() << "\n" + "string: " << s << "\n" + "need : " << need << " (0x" << uneed << ")\n" + "got : " << got << " (0x" << ugot << ")" << + std::endl; } } + template + void + check_numbers(F const& f) + { + auto const fc = + [&f, this](std::string const& s) + { + fcheck(s, f); + }; + + fc( "-999999999999999999999" ); + fc( "-100000000000000000009"); + fc( "-10000000000000000000" ); + fc( "-9223372036854775809" ); + + fc( "18446744073709551616" ); + fc( "99999999999999999999" ); + fc( "999999999999999999999" ); + fc( "1000000000000000000000" ); + fc( "9999999999999999999999" ); + fc( "99999999999999999999999" ); + + fc("-0.9999999999999999999999" ); + fc("-0.9999999999999999" ); + fc("-0.9007199254740991" ); + fc("-0.999999999999999" ); + fc("-0.99999999999999" ); + fc("-0.9999999999999" ); + fc("-0.999999999999" ); + fc("-0.99999999999" ); + fc("-0.9999999999" ); + fc("-0.999999999" ); + fc("-0.99999999" ); + fc("-0.9999999" ); + fc("-0.999999" ); + fc("-0.99999" ); + fc("-0.9999" ); + fc("-0.8125" ); + fc("-0.999" ); + fc("-0.99" ); + fc("-1.0" ); + fc("-0.9" ); + fc("-0.0" ); + fc( "0.0" ); + fc( "0.9" ); + fc( "0.99" ); + fc( "0.999" ); + fc( "0.8125" ); + fc( "0.9999" ); + fc( "0.99999" ); + fc( "0.999999" ); + fc( "0.9999999" ); + fc( "0.99999999" ); + fc( "0.999999999" ); + fc( "0.9999999999" ); + fc( "0.99999999999" ); + fc( "0.999999999999" ); + fc( "0.9999999999999" ); + fc( "0.99999999999999" ); + fc( "0.999999999999999" ); + fc( "0.9007199254740991" ); + fc( "0.9999999999999999" ); + fc( "0.9999999999999999999999" ); + fc( "0.999999999999999999999999999" ); + + fc("-1e308" ); + fc("-1e-308" ); + fc("-9999e300" ); + fc("-999e100" ); + fc("-99e10" ); + fc("-9e1" ); + fc( "9e1" ); + fc( "99e10" ); + fc( "999e100" ); + fc( "9999e300" ); + fc( "999999999999999999.0" ); + fc( "999999999999999999999.0" ); + fc( "999999999999999999999e5" ); + fc( "999999999999999999999.0e5" ); + + fc( "0.00000000000000001" ); + + fc("-1e-1" ); + fc("-1e0" ); + fc("-1e1" ); + fc( "0e0" ); + fc( "1e0" ); + fc( "1e10" ); + + fc( + "0." + "00000000000000000000000000000000000000000000000000" // 50 zeroes + "1e50" ); + + fc( + "0." + "00000000000000000000000000000000000000000000000000" // 50 zeroes + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "1e600" ); + } + + void + testDoubles() + { + check_numbers(f_boost{}); + } + void run() { - testParse(); testMembers(); + testIntegers(); + testBad(); + testDoubles(); } }; -TEST_SUITE(number_parser_test, "boost.json.detail.number_parser"); +TEST_SUITE(number_test, "boost.json.detail.number"); } // detail } // json From 2a44133921fcf9f7013ba6639460c28e0a54c690 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Thu, 9 Jan 2020 11:04:06 -0800 Subject: [PATCH 02/10] Tests and number parsing fixes --- include/boost/json/detail/impl/number.ipp | 33 ++++--- include/boost/json/detail/number.hpp | 3 +- test/_detail_number.cpp | 114 ++++++++++++++++++++-- test/serializer.cpp | 18 ++++ 4 files changed, 148 insertions(+), 20 deletions(-) diff --git a/include/boost/json/detail/impl/number.ipp b/include/boost/json/detail/impl/number.ipp index a39130242..53cecbe4d 100644 --- a/include/boost/json/detail/impl/number.ipp +++ b/include/boost/json/detail/impl/number.ipp @@ -166,9 +166,9 @@ loop: // [0,1..9] case state::init0: { + BOOST_ASSERT(neg_); BOOST_ASSERT( n_.kind == kind::int64); - BOOST_ASSERT(neg_); if(p >= p1) break; unsigned char const d = *p - '0'; @@ -209,7 +209,7 @@ loop: st_ = state::frac1; goto loop; } - // zero + // just a zero n_.u = 0; st_ = state::end; goto finish; @@ -219,10 +219,11 @@ loop: // *[0..9] case state::mant: { + BOOST_ASSERT(! neg_); + BOOST_ASSERT(dig_ > 0); BOOST_ASSERT( n_.kind == kind::int64); - BOOST_ASSERT(! neg_); - if(p < p1) + if(p < p1) // VFALCO see if the compiler can do this for us { auto m = n_.u; do @@ -234,6 +235,8 @@ loop: if( m > 1844674407370955161 || ( m == 1844674407370955161 && d > 5)) { + // VFALCO Conversion to double may + // require intelligent rounding ++p; n_.d = static_cast(m) * 10; n_.kind = kind::double_; @@ -260,6 +263,7 @@ loop: st_ = state::exp1; goto loop; } + // reached end of number n_.u = m; finish(ec); goto finish; @@ -273,9 +277,10 @@ loop: // *[0..9] (negative) case state::mantn: { - BOOST_ASSERT(n_.kind == kind::int64); BOOST_ASSERT(neg_); - if(p < p1) + BOOST_ASSERT(dig_ > 0); + BOOST_ASSERT(n_.kind == kind::int64); + if(p < p1) // VFALCO see if the compiler can do this for us { auto m = n_.u; do @@ -287,6 +292,9 @@ loop: if( m > 922337203685477580 || ( m == 922337203685477580 && d > 8)) { + // VFALCO Conversion to double may + // require intelligent rounding. + // Need to do the right thing for neg_ == true n_.d = static_cast(m); n_.kind = kind::double_; st_ = state::mantd; @@ -300,6 +308,7 @@ loop: if(*p == '.') { ++p; + n_.u = m; st_ = state::frac1; goto loop; } @@ -311,9 +320,9 @@ loop: st_ = state::exp1; goto loop; } - n_.i = static_cast< - int64_t>(~n_.u+1); - st_ = state::end; + // reached end of number + n_.u = m; + finish(ec); goto finish; } while(p < p1); @@ -583,9 +592,10 @@ finish( break; case state::mant: + BOOST_ASSERT(! neg_); + BOOST_ASSERT(dig_ > 0); BOOST_ASSERT( n_.kind == kind::int64); - BOOST_ASSERT(! neg_); //ec = {}; if(n_.u <= INT64_MAX) n_.i = static_cast< @@ -596,9 +606,10 @@ finish( break; case state::mantn: + BOOST_ASSERT(neg_); + BOOST_ASSERT(dig_ > 0); BOOST_ASSERT( n_.kind == kind::int64); - BOOST_ASSERT(neg_); //ec = {}; n_.i = static_cast< int64_t>(~n_.u+1); diff --git a/include/boost/json/detail/number.hpp b/include/boost/json/detail/number.hpp index 96ec24a08..856ebffea 100644 --- a/include/boost/json/detail/number.hpp +++ b/include/boost/json/detail/number.hpp @@ -13,6 +13,7 @@ #include #include #include +#include #include namespace boost { @@ -46,7 +47,7 @@ class number_parser number n_; short exp_; - short dig_; + short dig_; // significant digits in mantissa short off_; bool neg_; bool eneg_; diff --git a/test/_detail_number.cpp b/test/_detail_number.cpp index 91e72e50e..e9d46f6d0 100644 --- a/test/_detail_number.cpp +++ b/test/_detail_number.cpp @@ -20,6 +20,28 @@ namespace boost { namespace json { namespace detail { +bool +operator==( + number const& lhs, + number const& rhs) noexcept +{ + if(lhs.kind != rhs.kind) + return false; + switch(lhs.kind) + { + case json::kind::int64: + return lhs.i == rhs.i; + case json::kind::uint64: + return lhs.u == rhs.u; + default: + break; + } + return + std::signbit(lhs.d) == + std::signbit(rhs.d) && + lhs.d == rhs.d; +} + class number_test { public: @@ -152,7 +174,7 @@ class number_test int64_t i) { grind(s, - [&](detail::number num) + [&](number num) { if( BOOST_TEST( num.kind == kind::int64)) @@ -166,7 +188,7 @@ class number_test uint64_t u) { grind(s, - [&](detail::number num) + [&](number num) { if( BOOST_TEST( num.kind == kind::uint64)) @@ -275,7 +297,7 @@ class number_test num.kind == kind::double_); grind(s, - [&](detail::number num1) + [&](number num1) { if( BOOST_TEST( num1.kind == kind::double_)) @@ -406,16 +428,26 @@ class number_test fc( "1e0" ); fc( "1e10" ); - fc( - "0." + fc( "0." "00000000000000000000000000000000000000000000000000" // 50 zeroes "1e50" ); - - fc( - "0." + fc( "-0." "00000000000000000000000000000000000000000000000000" // 50 zeroes + "1e50" ); + + fc("0." + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" "00000000000000000000000000000000000000000000000000" "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" // 500 zeroes + "1e600" ); + fc("-0." "00000000000000000000000000000000000000000000000000" "00000000000000000000000000000000000000000000000000" "00000000000000000000000000000000000000000000000000" @@ -423,7 +455,24 @@ class number_test "00000000000000000000000000000000000000000000000000" "00000000000000000000000000000000000000000000000000" "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" // 500 zeroes "1e600" ); + + fc( "0e" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" // 500 zeroes + ); + } void @@ -432,6 +481,54 @@ class number_test check_numbers(f_boost{}); } + static + number + int64_num(int64_t i) noexcept + { + number num; + num.i = i; + num.kind = kind::int64; + return num; + } + + static + number + uint64_num(uint64_t u) noexcept + { + number num; + num.u = u; + num.kind = kind::uint64; + return num; + } + + static + number + double_num(double d) noexcept + { + number num; + num.d = d; + num.kind = kind::double_; + return num; + } + + void + testEdgeCases() + { + auto const parse = + [&](string_view s) + { + error_code ec; + number_parser p; + p.write(s.data(), s.size(), ec); + BOOST_TEST(! ec); + return p.get(); + }; + + BOOST_TEST(parse("-0.0") == double_num(-0.0)); + BOOST_TEST(parse("-0E0") == double_num(-0.0)); + BOOST_TEST(parse("-0") == int64_num(0)); + } + void run() { @@ -439,6 +536,7 @@ class number_test testIntegers(); testBad(); testDoubles(); + testEdgeCases(); } }; diff --git a/test/serializer.cpp b/test/serializer.cpp index 19573c03e..493c81d22 100644 --- a/test/serializer.cpp +++ b/test/serializer.cpp @@ -273,6 +273,8 @@ class serializer_test check("-999"); check("-99"); check("-9"); + check("-0"); + check("-0.0"); check( "0"); check( "9"); check( "99"); @@ -461,6 +463,21 @@ class serializer_test } } + void + testNumberRoundTrips() + { + BOOST_TEST(std::signbit(parse("-0.0").as_double())); + BOOST_TEST(to_string(value(-0.0)) == "-0E0"); + + //BOOST_TEST(parse("-0.0").as_double() == -0); + //BOOST_TEST(parse("-0").as_int64() == 0); + //BOOST_TEST(to_string(parse("0.0")) == "0"); + //BOOST_TEST(to_string(parse("-0.0")) == "-0.0"); + + // VFALCO Peter is unsure what this should do + //BOOST_TEST(to_string(parse("-0")) == "-0"); + } + void run() { @@ -472,6 +489,7 @@ class serializer_test testScalar(); testVectors(); testOstream(); + testNumberRoundTrips(); } }; From ad109b5dc21a2dc1caf35e1593393a22ed6ac761 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Thu, 9 Jan 2020 11:51:01 -0800 Subject: [PATCH 03/10] [FOLD] --- include/boost/json/detail/impl/number.ipp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/boost/json/detail/impl/number.ipp b/include/boost/json/detail/impl/number.ipp index 53cecbe4d..c3acaca5c 100644 --- a/include/boost/json/detail/impl/number.ipp +++ b/include/boost/json/detail/impl/number.ipp @@ -300,9 +300,9 @@ loop: st_ = state::mantd; goto loop; } - m = 10 * m + d; - ++dig_; ++p; + ++dig_; + m = 10 * m + d; continue; } if(*p == '.') @@ -332,6 +332,8 @@ loop: } // *[0..9] (double) + // Accumulate mantissa digits to the left + // of the decimal point, beyond double precision. case state::mantd: { BOOST_ASSERT( From c5a59fa0b88ee23427956a63c7381a415d50f733 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Thu, 9 Jan 2020 12:23:07 -0800 Subject: [PATCH 04/10] [FOLD] before big change --- include/boost/json/detail/impl/number.ipp | 52 +++++++++++------------ include/boost/json/detail/number.hpp | 5 ++- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/include/boost/json/detail/impl/number.ipp b/include/boost/json/detail/impl/number.ipp index c3acaca5c..bf73958e8 100644 --- a/include/boost/json/detail/impl/number.ipp +++ b/include/boost/json/detail/impl/number.ipp @@ -235,13 +235,8 @@ loop: if( m > 1844674407370955161 || ( m == 1844674407370955161 && d > 5)) { - // VFALCO Conversion to double may - // require intelligent rounding - ++p; - n_.d = static_cast(m) * 10; - n_.kind = kind::double_; - st_ = state::mantd; - goto loop; + n_.u = m; + goto enter_mantd; } ++p; ++dig_; @@ -292,13 +287,8 @@ loop: if( m > 922337203685477580 || ( m == 922337203685477580 && d > 8)) { - // VFALCO Conversion to double may - // require intelligent rounding. - // Need to do the right thing for neg_ == true - n_.d = static_cast(m); - n_.kind = kind::double_; - st_ = state::mantd; - goto loop; + n_.u = m; + goto enter_mantd; } ++p; ++dig_; @@ -331,40 +321,49 @@ loop: break; } + enter_mantd: + BOOST_ASSERT(off_ == 0); + ++p; + off_ = 1; + st_ = state::mantd; + n_.kind = kind::double_; + // VFALCO Conversion to double may + // require intelligent rounding. + // Need to do the right thing for neg_ == true + n_.d = static_cast(n_.u); + BOOST_FALLTHROUGH; + // *[0..9] (double) - // Accumulate mantissa digits to the left - // of the decimal point, beyond double precision. case state::mantd: { + // make sure we are past the + // limit of double precision. + BOOST_ASSERT(dig_ >= 18); BOOST_ASSERT( n_.kind == kind::double_); - auto d = n_.d; while(p < p1) { if(*p == '.') { ++p; - n_.d = d; st_ = state::fracd; goto loop; } if(*p == 'e' || *p == 'E') { ++p; - n_.d = d; st_ = state::exp1; goto loop; } - if(static_cast( - *p - '0') > 9) + unsigned char const d = *p - '0'; + if(d >= 10) { finish(ec); goto finish; } ++p; - d = d * 10; + ++off_; } - n_.d = d; break; } @@ -385,15 +384,16 @@ loop: } n_.kind = kind::double_; st_ = state::frac2; + // don't consume *p here BOOST_FALLTHROUGH; } - // zero or more [0..9] + // *[0..9] case state::frac2: { BOOST_ASSERT( n_.kind == kind::double_); - if(p < p1) + if(p < p1) // VFALCO see if the compiler can do this for us { auto m = n_.u; do @@ -431,7 +431,7 @@ loop: break; } - // zero or more [0..9] (double) + // *[0..9] (double) case state::fracd: { BOOST_ASSERT( diff --git a/include/boost/json/detail/number.hpp b/include/boost/json/detail/number.hpp index 856ebffea..8301e3877 100644 --- a/include/boost/json/detail/number.hpp +++ b/include/boost/json/detail/number.hpp @@ -46,9 +46,10 @@ class number_parser }; number n_; - short exp_; + short exp_; // exponent string as integer short dig_; // significant digits in mantissa - short off_; + short off_; // mantissa's contribution to exponent + short left_; // number of digits to the left of the decimal bool neg_; bool eneg_; state st_; From 050d1bff408f0e1df9fcbec52d1352c201d803f3 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Thu, 9 Jan 2020 15:09:19 -0800 Subject: [PATCH 05/10] [FOLD] rewriting --- include/boost/json/detail/impl/number.ipp | 345 ++++++++++------------ include/boost/json/detail/number.hpp | 7 +- test/_detail_number.cpp | 78 ++++- 3 files changed, 235 insertions(+), 195 deletions(-) diff --git a/include/boost/json/detail/impl/number.ipp b/include/boost/json/detail/impl/number.ipp index bf73958e8..9528c26d2 100644 --- a/include/boost/json/detail/impl/number.ipp +++ b/include/boost/json/detail/impl/number.ipp @@ -110,6 +110,8 @@ maybe_init(char ch) noexcept exp_ = 0; dig_ = 0; off_ = 0; + pos_ = -1; + sig_ = 0; neg_ = true; st_ = state::init0; return true; @@ -120,6 +122,8 @@ maybe_init(char ch) noexcept n_.u = d; exp_ = 0; off_ = 0; + pos_ = -1; + sig_ = 0; neg_ = false; n_.kind = kind::int64; if(ch == '0') @@ -166,6 +170,7 @@ loop: // [0,1..9] case state::init0: { + // got minus sign BOOST_ASSERT(neg_); BOOST_ASSERT( n_.kind == kind::int64); @@ -191,6 +196,8 @@ loop: // [.eE] case state::init1: + { + // got leading 0 BOOST_ASSERT( n_.kind == kind::int64); if(p >= p1) @@ -198,21 +205,61 @@ loop: if(*p == 'e' || *p == 'E') { ++p; - n_.d = 0; n_.kind = kind::double_; st_ = state::exp1; goto loop; } if(*p == '.') { + BOOST_ASSERT(pos_ < 0); + BOOST_ASSERT(dig_ == 0); ++p; - st_ = state::frac1; + pos_ = 0; + st_ = state::zeroes; + n_.kind = kind::double_; goto loop; } - // just a zero - n_.u = 0; + unsigned char const d = *p - '0'; + if(d < 10) + { + ec = error::expected_fraction; + goto finish; + } + // reached end of number st_ = state::end; goto finish; + } + + //----------------------------------- + + // *[0] + case state::zeroes: + { + BOOST_ASSERT(pos_ == 0); + while(p < p1) + { + unsigned char const d = *p - '0'; + if(d == 0) + { + ++p; + ++dig_; + continue; + } + if(d < 10 || + *p == 'e' || *p == 'E') + { + if(neg_) + st_ = state::mantn; + else + st_ = state::mant; + goto loop; + } + // reached end of number + st_ = state::end; + goto finish; + } + break; + } //----------------------------------- @@ -220,9 +267,6 @@ loop: case state::mant: { BOOST_ASSERT(! neg_); - BOOST_ASSERT(dig_ > 0); - BOOST_ASSERT( - n_.kind == kind::int64); if(p < p1) // VFALCO see if the compiler can do this for us { auto m = n_.u; @@ -239,21 +283,24 @@ loop: goto enter_mantd; } ++p; + // VFALCO Check dig_ for overflow + // Could use an implementation-defined limit + // which is lower than USHRT_MAX ++dig_; m = 10 * m + d; continue; } - if(*p == '.') + if(*p == '.' && pos_ < 0) { ++p; - n_.u = m; - st_ = state::frac1; - goto loop; + pos_ = dig_; + n_.kind = kind::double_; + continue; } if(*p == 'e' || *p == 'E') { ++p; - n_.d = static_cast(m); + n_.u = m; n_.kind = kind::double_; st_ = state::exp1; goto loop; @@ -273,8 +320,6 @@ loop: case state::mantn: { BOOST_ASSERT(neg_); - BOOST_ASSERT(dig_ > 0); - BOOST_ASSERT(n_.kind == kind::int64); if(p < p1) // VFALCO see if the compiler can do this for us { auto m = n_.u; @@ -291,21 +336,24 @@ loop: goto enter_mantd; } ++p; + // VFALCO Check dig_ for overflow + // Could use an implementation-defined limit + // which is lower than USHRT_MAX ++dig_; m = 10 * m + d; continue; } - if(*p == '.') + if(*p == '.' && pos_ < 0) { ++p; - n_.u = m; - st_ = state::frac1; - goto loop; + pos_ = dig_; + n_.kind = kind::double_; + continue; } if(*p == 'e' || *p == 'E') { ++p; - n_.d = static_cast(m); + n_.u = m; n_.kind = kind::double_; st_ = state::exp1; goto loop; @@ -322,32 +370,29 @@ loop: } enter_mantd: + // make sure we are past the + // limit of double precision. + BOOST_ASSERT(dig_ >= 18); BOOST_ASSERT(off_ == 0); ++p; - off_ = 1; + // VFALCO Check dig_ for overflow + // Could use an implementation-defined limit + // which is lower than USHRT_MAX + sig_ = dig_++; st_ = state::mantd; n_.kind = kind::double_; - // VFALCO Conversion to double may - // require intelligent rounding. - // Need to do the right thing for neg_ == true - n_.d = static_cast(n_.u); BOOST_FALLTHROUGH; // *[0..9] (double) case state::mantd: { - // make sure we are past the - // limit of double precision. - BOOST_ASSERT(dig_ >= 18); - BOOST_ASSERT( - n_.kind == kind::double_); while(p < p1) { - if(*p == '.') + if(*p == '.' && pos_ < 0) { ++p; - st_ = state::fracd; - goto loop; + pos_ = dig_; + continue; } if(*p == 'e' || *p == 'E') { @@ -358,122 +403,22 @@ loop: unsigned char const d = *p - '0'; if(d >= 10) { + // reached end of number finish(ec); goto finish; } ++p; - ++off_; - } - break; - } - - //----------------------------------- - - // [0..9] - case state::frac1: - { - BOOST_ASSERT( - n_.kind == kind::int64); - if(p >= p1) - break; - unsigned char const d = *p - '0'; - if(d >= 10) - { - ec = error::expected_fraction; - goto finish; - } - n_.kind = kind::double_; - st_ = state::frac2; - // don't consume *p here - BOOST_FALLTHROUGH; - } - - // *[0..9] - case state::frac2: - { - BOOST_ASSERT( - n_.kind == kind::double_); - if(p < p1) // VFALCO see if the compiler can do this for us - { - auto m = n_.u; - do - { - unsigned char const d = *p - '0'; - if(d < 10) - { - if(m > 9007199254740991) // (2^53-1) - { - ++p; - n_.d = static_cast(m); - st_ = state::fracd; - goto loop; - } - ++p; - m = 10 * m + d; - --off_; - if(m != 0) - ++dig_; - continue; - } - if(*p != 'e' && *p != 'E') - { - n_.u = m; - finish(ec); - goto finish; - } - ++p; - st_ = state::exp1; - goto loop; - } - while(p < p1); - n_.u = m; - } - break; - } - - // *[0..9] (double) - case state::fracd: - { - BOOST_ASSERT( - n_.kind == kind::double_); - if(p < p1) - { - auto m = n_.d; - do - { - unsigned char const d = *p - '0'; - if(d < 10) - { - if(dig_ < 17) - { - m = 10 * m + d; - --off_; - if(m > 0) - ++dig_; - } - ++p; - continue; - } - if(*p != 'e' && *p != 'E') - { - n_.d = m; - finish(ec); - goto finish; - } - ++p; - n_.d = m; - st_ = state::exp1; - goto loop; - } - while(p < p1); - n_.d = m; + // VFALCO Check dig_ for overflow + // Could use an implementation-defined limit + // which is lower than USHRT_MAX + ++dig_; } break; } //----------------------------------- - // + or - + // [-+,0..9] case state::exp1: { BOOST_ASSERT( @@ -501,7 +446,7 @@ loop: if(p >= p1) break; unsigned char const d = *p - '0'; - if(d > 9) + if(d >= 10) { ec = error::expected_exponent; goto finish; @@ -517,7 +462,8 @@ loop: { if(p < p1) { - auto const lim = 308 - off_; + // VFALCO FIX + auto const lim = 700;//308 - off_; auto e = exp_; while(p < p1) { @@ -586,70 +532,98 @@ finish( break; case state::init1: + BOOST_ASSERT(n_.u == 0); BOOST_ASSERT( n_.kind == kind::int64); - BOOST_ASSERT(n_.i == 0); //ec = {}; st_ = state::end; break; - case state::mant: - BOOST_ASSERT(! neg_); - BOOST_ASSERT(dig_ > 0); - BOOST_ASSERT( - n_.kind == kind::int64); - //ec = {}; - if(n_.u <= INT64_MAX) - n_.i = static_cast< - int64_t>(n_.u); + case state::zeroes: + BOOST_ASSERT(n_.u == 0); + if(pos_ == dig_) + { + ec = error::expected_fraction; + break; + } + if(pos_ >= 0) + { + BOOST_ASSERT( + n_.kind == kind::double_); + if(neg_) + n_.d = -0.0; + else + n_.d = 0; + } else - n_.kind = kind::uint64; + { + BOOST_ASSERT( + n_.kind == kind::int64); + n_.i = 0; + } st_ = state::end; break; + case state::mant: + BOOST_ASSERT(! neg_); + BOOST_FALLTHROUGH; case state::mantn: - BOOST_ASSERT(neg_); BOOST_ASSERT(dig_ > 0); - BOOST_ASSERT( - n_.kind == kind::int64); - //ec = {}; - n_.i = static_cast< - int64_t>(~n_.u+1); + if(pos_ == dig_) + { + ec = error::expected_fraction; + break; + } + if(n_.kind == kind::double_) + { + if( pos_ < 0) + pos_ = dig_; + if(neg_) + n_.d = (-static_cast< + double>(n_.u)) * + pow10(pos_ - dig_); + else + n_.d = static_cast< + double>(n_.u) * + pow10(pos_ - dig_); + } + else + { + BOOST_ASSERT( + n_.kind == kind::int64); + if(st_ == state::mantn) + { + n_.i = static_cast< + int64_t>(~n_.u+1); + } + else + { + if( n_.u <= INT64_MAX) + n_.i = static_cast< + int64_t>(n_.u); + else + n_.kind = kind::uint64; + } + } st_ = state::end; break; case state::mantd: - //ec = {}; - if(neg_) - n_.d = -n_.d; - exp_ += off_; - n_.d *= pow10(exp_); - st_ = state::end; - break; - - case state::frac1: - ec = error::expected_fraction; - break; - - case state::frac2: BOOST_ASSERT( n_.kind == kind::double_); - //ec = {}; - exp_ += off_; - n_.d = n_.u * pow10(exp_); + if(pos_ == dig_) + { + ec = error::expected_fraction; + break; + } + if( pos_ < 0) + pos_ = dig_; + n_.d = static_cast(n_.u) * + pow10(pos_ - sig_); if(neg_) n_.d = -n_.d; - st_ = state::end; - break; - - case state::fracd: - BOOST_ASSERT( - n_.kind == kind::double_); - //ec = {}; exp_ += off_; - n_.d = n_.d * pow10(exp_); - if(neg_) - n_.d = -n_.d; + n_.d *= pow10(exp_); st_ = state::end; break; @@ -662,18 +636,17 @@ finish( break; case state::exp3: - //ec = {}; exp_ += off_; if(eneg_) exp_ = -exp_; - n_.d = n_.d * pow10(exp_); + n_.d = static_cast(n_.u) * + pow10(exp_ - dig_); if(neg_) n_.d = -n_.d; st_ = state::end; break; case state::end: - //ec = {}; break; } } diff --git a/include/boost/json/detail/number.hpp b/include/boost/json/detail/number.hpp index 8301e3877..36c208e73 100644 --- a/include/boost/json/detail/number.hpp +++ b/include/boost/json/detail/number.hpp @@ -39,17 +39,18 @@ class number_parser enum class state { init, init0, init1, + zeroes, mant, mantn, mantd, - frac1, frac2, fracd, exp1, exp2, exp3, end }; number n_; short exp_; // exponent string as integer - short dig_; // significant digits in mantissa + short dig_; // digits in mantissa short off_; // mantissa's contribution to exponent - short left_; // number of digits to the left of the decimal + short pos_; // position of decimal point + short sig_; // significant digits in mantissa bool neg_; bool eneg_; state st_; diff --git a/test/_detail_number.cpp b/test/_detail_number.cpp index e9d46f6d0..6c40096ac 100644 --- a/test/_detail_number.cpp +++ b/test/_detail_number.cpp @@ -250,13 +250,9 @@ class number_test { check_bad(""); check_bad("x"); - check_bad("00"); check_bad("e"); check_bad("1ex"); check_bad("-"); - check_bad("00"); - check_bad("00."); - check_bad("00.0"); check_bad("1a"); check_bad("."); check_bad("1."); @@ -269,6 +265,16 @@ class number_test check_bad("0.0e"); check_bad("-e"); check_bad("-x"); + + // leading 0 must be followed by [.eE] or nothing + check_bad( "00"); + check_bad( "01"); + check_bad( "00."); + check_bad( "00.0"); + check_bad("-00"); + check_bad("-01"); + check_bad("-00."); + check_bad("-00.0"); } //------------------------------------------------------ @@ -310,7 +316,7 @@ class number_test // Verify that f converts to the // same double produced by `strtod`. - // Requires `s` does not fit in an integral type. + // Requires `s` is not represented by an integral type. template void fcheck(std::string const& s, F const& f) @@ -527,16 +533,35 @@ class number_test BOOST_TEST(parse("-0.0") == double_num(-0.0)); BOOST_TEST(parse("-0E0") == double_num(-0.0)); BOOST_TEST(parse("-0") == int64_num(0)); + + BOOST_TEST(parse("0") == int64_num(0)); + BOOST_TEST(parse("0.010") == double_num(0.01)); + BOOST_TEST(parse("-0.010") == double_num(-0.01)); + BOOST_TEST(parse("1.010") == double_num(1.01)); + BOOST_TEST(parse("-1.010") == double_num(-1.01)); } void run() { + fcheck( + "0." + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" // 500 zeroes + "1e600", f_boost{} ); + testEdgeCases(); testMembers(); testIntegers(); testBad(); testDoubles(); - testEdgeCases(); } }; @@ -545,3 +570,44 @@ TEST_SUITE(number_test, "boost.json.detail.number"); } // detail } // json } // boost + +#if 0 + +(for positive) +A. accumulate digits into unsigned u + if(got('.')) + if( have_dot ) + return error; + dot_pos = pos + else if(u > UINT64_MAX) + goto state C + else + ++dig_; + accumulate digit + +(for negative) +B. accumulate digits into unsigned u + if(got('.')) + if( have_dot ) + return error; + dot_pos = pos + else if(u > abs(INT64_MIN)) + goto state C + else + ++dig_; + accumulate digit + +C. accumulate exponent offset + if(got('e', 'E', '-', '+') + ... + else if(got('.')) + if( have_dot ) + return error; + dot_pos = pos + else + if( have_dot ) + // do nothing + else + ++dig_; + +#endif From 5c055523a2baa9ff289f70bf58dd22010dd5e14e Mon Sep 17 00:00:00 2001 From: Richard Hodges Date: Fri, 10 Jan 2020 10:51:13 +0100 Subject: [PATCH 06/10] correct number parser: Fixes a number of issues. Number parsing tests now pass, including edge cases. Remaining: examine options for str to double rounding corrections when number is outside the range 1e-22->1e22 ref: David Gray's seminal work https://ampl.com/netlib/fp/dtoa.c --- CMakeLists.txt | 7 + include/boost/json/detail/impl/number.ipp | 101 +++++- include/boost/json/detail/number.hpp | 2 +- test/_detail_number.cpp | 396 ++++++++++++++-------- test/basic_parser.cpp | 4 +- 5 files changed, 343 insertions(+), 167 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 70201f51b..767a8dc33 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,4 +87,11 @@ else() PUBLIC Boost::system ) + option(BUILD_TESTING "Build the tests" ON) + if (BUILD_TESTING) + add_subdirectory(bench) + add_subdirectory(example) + add_subdirectory(test) + endif() + endif() diff --git a/include/boost/json/detail/impl/number.ipp b/include/boost/json/detail/impl/number.ipp index 9528c26d2..b640ce759 100644 --- a/include/boost/json/detail/impl/number.ipp +++ b/include/boost/json/detail/impl/number.ipp @@ -92,9 +92,16 @@ pow10(int exp) noexcept 1e+300, 1e+301, 1e+302, 1e+303, 1e+304, 1e+305, 1e+306, 1e+307, 1e+308 }; - exp += 308; - BOOST_ASSERT(exp >= 0 && exp < 618); - return tab[exp]; + if (exp < -308 || exp > 308) + { + return std::pow(10.0, exp); + } + else + { + exp += 308; + BOOST_ASSERT(exp >= 0 && exp < 618); + return tab[exp]; + } } // return true on '-' '0' '1'..'9'. @@ -109,7 +116,6 @@ maybe_init(char ch) noexcept n_.kind = kind::int64; exp_ = 0; dig_ = 0; - off_ = 0; pos_ = -1; sig_ = 0; neg_ = true; @@ -121,7 +127,6 @@ maybe_init(char ch) noexcept return false; n_.u = d; exp_ = 0; - off_ = 0; pos_ = -1; sig_ = 0; neg_ = false; @@ -134,6 +139,7 @@ maybe_init(char ch) noexcept else { dig_ = 1; + sig_ = 1; st_ = state::mant; } return true; @@ -190,6 +196,7 @@ loop: } n_.u = d; dig_ = 1; + sig_ = 1; st_ = state::mantn; goto loop; } @@ -215,7 +222,7 @@ loop: BOOST_ASSERT(dig_ == 0); ++p; pos_ = 0; - st_ = state::zeroes; + st_ = state::mantf; n_.kind = kind::double_; goto loop; } @@ -230,7 +237,34 @@ loop: goto finish; } - //----------------------------------- + // 1*digit + case state::mantf: + { + if (p >= p1) + break; + unsigned char const d = *p - '0'; + if(d < 10) + { + if ( d == 0 && n_.u == 0) + { + st_ = state::zeroes; + ++p; + ++dig_; + } + else if(neg_) + { + st_ = state::mantn; + } + else + { + st_ = state::mant; + } + goto loop; + } + + ec = error::expected_fraction; + goto finish; + } // *[0] case state::zeroes: @@ -287,6 +321,7 @@ loop: // Could use an implementation-defined limit // which is lower than USHRT_MAX ++dig_; + ++sig_; m = 10 * m + d; continue; } @@ -295,10 +330,15 @@ loop: ++p; pos_ = dig_; n_.kind = kind::double_; - continue; + st_ = state::mantf; + n_.u = m; + goto loop; } if(*p == 'e' || *p == 'E') { + // 'E' implies '.' if not already encountered + if (pos_ < 0) + pos_ = dig_; ++p; n_.u = m; n_.kind = kind::double_; @@ -339,6 +379,7 @@ loop: // VFALCO Check dig_ for overflow // Could use an implementation-defined limit // which is lower than USHRT_MAX + ++sig_; ++dig_; m = 10 * m + d; continue; @@ -348,10 +389,15 @@ loop: ++p; pos_ = dig_; n_.kind = kind::double_; - continue; + n_.u = m; + st_ = state::mantf; + goto loop; } if(*p == 'e' || *p == 'E') { + // 'E' implies '.' if not already encountered + if (pos_ < 0) + pos_ = dig_; ++p; n_.u = m; n_.kind = kind::double_; @@ -372,13 +418,13 @@ loop: enter_mantd: // make sure we are past the // limit of double precision. + BOOST_ASSERT(sig_ == dig_); BOOST_ASSERT(dig_ >= 18); - BOOST_ASSERT(off_ == 0); ++p; // VFALCO Check dig_ for overflow // Could use an implementation-defined limit // which is lower than USHRT_MAX - sig_ = dig_++; + ++dig_; st_ = state::mantd; n_.kind = kind::double_; BOOST_FALLTHROUGH; @@ -396,6 +442,9 @@ loop: } if(*p == 'e' || *p == 'E') { + // 'E' implies '.' if not already encountered + if (pos_ < 0) + pos_ = dig_; ++p; st_ = state::exp1; goto loop; @@ -564,6 +613,10 @@ finish( st_ = state::end; break; + case state::mantf: + ec = error::expected_fraction; + break; + case state::mant: BOOST_ASSERT(! neg_); BOOST_FALLTHROUGH; @@ -622,7 +675,6 @@ finish( pow10(pos_ - sig_); if(neg_) n_.d = -n_.d; - exp_ += off_; n_.d *= pow10(exp_); st_ = state::end; break; @@ -636,16 +688,31 @@ finish( break; case state::exp3: - exp_ += off_; - if(eneg_) + { + if (eneg_) exp_ = -exp_; + + if (pos_ == 0) + { + // |mantissa| < 1.0 + auto start = dig_ - sig_; + auto exponent_adjust = -start - 1; + exp_ += exponent_adjust; + } + else + { + // mantissa >= 1.0 + auto exponent_adjust = -sig_ + pos_; + exp_ += exponent_adjust; + } + n_.d = static_cast(n_.u) * - pow10(exp_ - dig_); - if(neg_) + pow10(exp_); + if (neg_) n_.d = -n_.d; st_ = state::end; break; - + } case state::end: break; } diff --git a/include/boost/json/detail/number.hpp b/include/boost/json/detail/number.hpp index 36c208e73..8f6efb07c 100644 --- a/include/boost/json/detail/number.hpp +++ b/include/boost/json/detail/number.hpp @@ -39,6 +39,7 @@ class number_parser enum class state { init, init0, init1, + mantf, zeroes, mant, mantn, mantd, exp1, exp2, exp3, @@ -48,7 +49,6 @@ class number_parser number n_; short exp_; // exponent string as integer short dig_; // digits in mantissa - short off_; // mantissa's contribution to exponent short pos_; // position of decimal point short sig_; // significant digits in mantissa bool neg_; diff --git a/test/_detail_number.cpp b/test/_detail_number.cpp index 6c40096ac..2ebcd0027 100644 --- a/test/_detail_number.cpp +++ b/test/_detail_number.cpp @@ -16,6 +16,8 @@ #include "test_suite.hpp" +#define ACCURATE_CONVERSION 0 + namespace boost { namespace json { namespace detail { @@ -36,12 +38,111 @@ operator==( default: break; } - return + return std::signbit(lhs.d) == std::signbit(rhs.d) && lhs.d == rhs.d; } +struct double_diagnoser +{ + + static void emit_hex(std::ostream& os, double d) + { + std::uint64_t binary; + static_assert(sizeof(binary) == sizeof(d), ""); + + std::memcpy(&binary, &d, sizeof(d)); + auto oldflags = os.flags(); + + try + { + os << std::hex << std::setw(16) << std::setfill('0') << binary; + os.flags(oldflags); + } + catch(...) + { + os.flags(oldflags); + } + } + static void emit_pow2(std::ostream& os, double d) + { + auto oldflags = os.flags(); + try + { + int exponent = 0; + auto mantissa = std::frexp(d, &exponent); + os << std::fixed << mantissa; + os << " *2^ "; + os << exponent; + os.flags(oldflags); + } + catch(...) + { + os.flags(oldflags); + throw; + } + } + + static void emit_scientific(std::ostream& os, double d) + { + auto oldflags = os.flags(); + try + { + constexpr auto digits = std::numeric_limits::max_digits10; + os << std::setprecision(digits) << d; + os.flags(oldflags); + } + catch(...) + { + os.flags(oldflags); + throw; + } + } + + friend + std::ostream& + operator<<(std::ostream& os, double_diagnoser diag) + { + emit_scientific(os, diag.d); + os << "(0x"; + emit_hex(os, diag.d); + os << ") "; + emit_pow2(os, diag.d); + return os; + } + + double d; +}; + +auto diagnose(double d) -> double_diagnoser +{ + return double_diagnoser { d }; +} + +bool +are_close( + double x, + double y) +{ + std::uint64_t bx, by; + std::memcpy(&bx, &x, sizeof(x)); + std::memcpy(&by, &y, sizeof(y)); + + auto diff = bx - by; + switch (diff) + { + case 0: + case 1: + case 0xffffffffffffffff: + return true; + default: + break; + } + return false; +} + + class number_test { public: @@ -239,7 +340,7 @@ class number_test check_int64( "99999999999999999", 99999999999999999); check_int64( "999999999999999999", 999999999999999999); check_int64( "9223372036854775807", INT64_MAX); - + check_uint64( "9223372036854775808", 9223372036854775808ULL); check_uint64( "9999999999999999999", 9999999999999999999ULL); check_uint64( "18446744073709551615", UINT64_MAX); @@ -255,16 +356,22 @@ class number_test check_bad("-"); check_bad("1a"); check_bad("."); + check_bad("-."); check_bad("1."); + check_bad("-1."); check_bad("1.x"); check_bad("1+"); + check_bad("1-"); check_bad("0.0+"); check_bad("0.0e+"); check_bad("0.0e-"); check_bad("0.0e0-"); check_bad("0.0e"); + check_bad("0.e1"); check_bad("-e"); check_bad("-x"); + check_bad("2.e+3"); + check_bad("-2.e+3"); // leading 0 must be followed by [.eE] or nothing check_bad( "00"); @@ -326,23 +433,30 @@ class number_test std::strtod(s.c_str(), &str_end); BOOST_TEST(str_end == &s.back() + 1); double const got = f(s); - if(! BOOST_TEST(got == need)) + auto same = got == need; + +#if !ACCURATE_CONVERSION + auto close = same ? true : are_close(got, need); + if(! BOOST_TEST(close)) { - std::uint64_t uneed; - std::uint64_t ugot; - std::memcpy(&uneed, &need, sizeof(need)); - std::memcpy(&ugot, &got, sizeof(got)); - log << - std::hex << - std::setprecision( - std::numeric_limits< - double>::max_digits10) << + log << "not close : " << f.name() << "\n" "string: " << s << "\n" - "need : " << need << " (0x" << uneed << ")\n" - "got : " << got << " (0x" << ugot << ")" << + "need : " << diagnose(need) << "\n" + "got : " << diagnose(got) << std::endl; } +#else + if (!BOOST_TEST(same)) + { + log << "close but not close enough : " << + f.name() << "\n" + "string: " << s << "\n" + "need : " << diagnose(need) << "\n" + "got : " << diagnose(got) << + std::endl; + } +#endif } template @@ -350,135 +464,134 @@ class number_test check_numbers(F const& f) { auto const fc = - [&f, this](std::string const& s) + [&](std::string const& s) { fcheck(s, f); }; - fc( "-999999999999999999999" ); - fc( "-100000000000000000009"); - fc( "-10000000000000000000" ); - fc( "-9223372036854775809" ); - - fc( "18446744073709551616" ); - fc( "99999999999999999999" ); - fc( "999999999999999999999" ); - fc( "1000000000000000000000" ); - fc( "9999999999999999999999" ); - fc( "99999999999999999999999" ); - - fc("-0.9999999999999999999999" ); - fc("-0.9999999999999999" ); - fc("-0.9007199254740991" ); - fc("-0.999999999999999" ); - fc("-0.99999999999999" ); - fc("-0.9999999999999" ); - fc("-0.999999999999" ); - fc("-0.99999999999" ); - fc("-0.9999999999" ); - fc("-0.999999999" ); - fc("-0.99999999" ); - fc("-0.9999999" ); - fc("-0.999999" ); - fc("-0.99999" ); - fc("-0.9999" ); - fc("-0.8125" ); - fc("-0.999" ); - fc("-0.99" ); - fc("-1.0" ); - fc("-0.9" ); - fc("-0.0" ); - fc( "0.0" ); - fc( "0.9" ); - fc( "0.99" ); - fc( "0.999" ); - fc( "0.8125" ); - fc( "0.9999" ); - fc( "0.99999" ); - fc( "0.999999" ); - fc( "0.9999999" ); - fc( "0.99999999" ); - fc( "0.999999999" ); - fc( "0.9999999999" ); - fc( "0.99999999999" ); - fc( "0.999999999999" ); - fc( "0.9999999999999" ); - fc( "0.99999999999999" ); - fc( "0.999999999999999" ); - fc( "0.9007199254740991" ); - fc( "0.9999999999999999" ); - fc( "0.9999999999999999999999" ); - fc( "0.999999999999999999999999999" ); - - fc("-1e308" ); - fc("-1e-308" ); - fc("-9999e300" ); - fc("-999e100" ); - fc("-99e10" ); - fc("-9e1" ); - fc( "9e1" ); - fc( "99e10" ); - fc( "999e100" ); - fc( "9999e300" ); - fc( "999999999999999999.0" ); - fc( "999999999999999999999.0" ); - fc( "999999999999999999999e5" ); - fc( "999999999999999999999.0e5" ); - - fc( "0.00000000000000001" ); - - fc("-1e-1" ); - fc("-1e0" ); - fc("-1e1" ); - fc( "0e0" ); - fc( "1e0" ); - fc( "1e10" ); - - fc( "0." - "00000000000000000000000000000000000000000000000000" // 50 zeroes - "1e50" ); - fc( "-0." - "00000000000000000000000000000000000000000000000000" // 50 zeroes - "1e50" ); + fc("-999999999999999999999"); + fc("-100000000000000000009"); + fc("-10000000000000000000"); + fc("-9223372036854775809"); + + fc("18446744073709551616"); + fc("99999999999999999999"); + fc("999999999999999999999"); + fc("1000000000000000000000"); + fc("9999999999999999999999"); + fc("99999999999999999999999"); + + fc("-0.9999999999999999999999"); + fc("-0.9999999999999999"); + fc("-0.9007199254740991"); + fc("-0.999999999999999"); + fc("-0.99999999999999"); + fc("-0.9999999999999"); + fc("-0.999999999999"); + fc("-0.99999999999"); + fc("-0.9999999999"); + fc("-0.999999999"); + fc("-0.99999999"); + fc("-0.9999999"); + fc("-0.999999"); + fc("-0.99999"); + fc("-0.9999"); + fc("-0.8125"); + fc("-0.999"); + fc("-0.99"); + fc("-1.0"); + fc("-0.9"); + fc("-0.0"); + fc("0.0"); + fc("0.9"); + fc("0.99"); + fc("0.999"); + fc("0.8125"); + fc("0.9999"); + fc("0.99999"); + fc("0.999999"); + fc("0.9999999"); + fc("0.99999999"); + fc("0.999999999"); + fc("0.9999999999"); + fc("0.99999999999"); + fc("0.999999999999"); + fc("0.9999999999999"); + fc("0.99999999999999"); + fc("0.999999999999999"); + fc("0.9007199254740991"); + fc("0.9999999999999999"); + fc("0.9999999999999999999999"); + fc("0.999999999999999999999999999"); + + fc("-1e308"); + fc("-1e-308"); + fc("-9999e300"); + fc("-999e100"); + fc("-99e10"); + fc("-9e1"); + fc("9e1"); + fc("99e10"); + fc("999e100"); + fc("9999e300"); + fc("999999999999999999.0"); + fc("999999999999999999999.0"); + fc("999999999999999999999e5"); + fc("999999999999999999999.0e5"); + + fc("0.00000000000000001"); + + fc("-1e-1"); + fc("-1e0"); + fc("-1e1"); + fc("0e0"); + fc("1e0"); + fc("1e10"); fc("0." - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" // 500 zeroes - "1e600" ); + "00000000000000000000000000000000000000000000000000" // 50 zeroes + "1e50"); fc("-0." - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" // 500 zeroes - "1e600" ); - - fc( "0e" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" // 500 zeroes - ); + "00000000000000000000000000000000000000000000000000" // 50 zeroes + "1e50"); + fc("0." + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" // 500 zeroes + "1e600"); + fc("-0." + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" // 500 zeroes + "1e600"); + + fc("0e" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000" // 500 zeroes + ); } void @@ -544,19 +657,6 @@ class number_test void run() { - fcheck( - "0." - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000000000000000000000" // 500 zeroes - "1e600", f_boost{} ); testEdgeCases(); testMembers(); testIntegers(); diff --git a/test/basic_parser.cpp b/test/basic_parser.cpp index 650ca04be..b29f15a6d 100644 --- a/test/basic_parser.cpp +++ b/test/basic_parser.cpp @@ -481,7 +481,10 @@ class basic_parser_test s.data(), s.size(), ec); if(! BOOST_TEST(! ec)) + { + ::test_suite::log_type() << " failed to parse: " << s; return; + } BOOST_TEST(is_done == p.is_done()); }; @@ -507,7 +510,6 @@ class basic_parser_test check("0 ", false); check("0x", true); check("0 x", true); - check("00", true); check("0.", false); check("0.0", false); check("0.0 ", false); From 253467059d89a262263375284c13e322a87db254 Mon Sep 17 00:00:00 2001 From: Richard Hodges Date: Tue, 14 Jan 2020 12:53:02 +0100 Subject: [PATCH 07/10] simplify E==. comment --- include/boost/json/detail/impl/number.ipp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/boost/json/detail/impl/number.ipp b/include/boost/json/detail/impl/number.ipp index b640ce759..c0cdce496 100644 --- a/include/boost/json/detail/impl/number.ipp +++ b/include/boost/json/detail/impl/number.ipp @@ -336,7 +336,7 @@ loop: } if(*p == 'e' || *p == 'E') { - // 'E' implies '.' if not already encountered + // treat 'E' as '.' if (pos_ < 0) pos_ = dig_; ++p; @@ -395,7 +395,7 @@ loop: } if(*p == 'e' || *p == 'E') { - // 'E' implies '.' if not already encountered + // treat 'E' as '.' if (pos_ < 0) pos_ = dig_; ++p; @@ -442,7 +442,7 @@ loop: } if(*p == 'e' || *p == 'E') { - // 'E' implies '.' if not already encountered + // treat 'E' as '.' if (pos_ < 0) pos_ = dig_; ++p; From cc92fc8b83e6093a4067274e4f44dce51ad10cf7 Mon Sep 17 00:00:00 2001 From: Richard Hodges Date: Tue, 14 Jan 2020 12:54:36 +0100 Subject: [PATCH 08/10] vinniefy mantissa comment --- include/boost/json/detail/impl/number.ipp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/boost/json/detail/impl/number.ipp b/include/boost/json/detail/impl/number.ipp index c0cdce496..fc9702e61 100644 --- a/include/boost/json/detail/impl/number.ipp +++ b/include/boost/json/detail/impl/number.ipp @@ -694,14 +694,14 @@ finish( if (pos_ == 0) { - // |mantissa| < 1.0 + // abs(mantissa) < 1 auto start = dig_ - sig_; auto exponent_adjust = -start - 1; exp_ += exponent_adjust; } else { - // mantissa >= 1.0 + // abs(mantissa) >= 1 auto exponent_adjust = -sig_ + pos_; exp_ += exponent_adjust; } From ceec33cd2b6c1fbd21d215f00a652233c331f62f Mon Sep 17 00:00:00 2001 From: Richard Hodges Date: Tue, 14 Jan 2020 13:02:17 +0100 Subject: [PATCH 09/10] remove temporary constexpr digits --- test/_detail_number.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/_detail_number.cpp b/test/_detail_number.cpp index 2ebcd0027..d7f76ffa5 100644 --- a/test/_detail_number.cpp +++ b/test/_detail_number.cpp @@ -89,8 +89,9 @@ struct double_diagnoser auto oldflags = os.flags(); try { - constexpr auto digits = std::numeric_limits::max_digits10; - os << std::setprecision(digits) << d; + os + << std::setprecision(std::numeric_limits::max_digits10) + << d; os.flags(oldflags); } catch(...) From a0a3dcb0063deb762b05b6ecd16098504865eb00 Mon Sep 17 00:00:00 2001 From: Richard Hodges Date: Tue, 14 Jan 2020 13:14:27 +0100 Subject: [PATCH 10/10] fix use of log in basic_parser test --- test/basic_parser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/basic_parser.cpp b/test/basic_parser.cpp index b29f15a6d..eb0c879a2 100644 --- a/test/basic_parser.cpp +++ b/test/basic_parser.cpp @@ -472,7 +472,7 @@ class basic_parser_test testParser() { auto const check = - []( string_view s, + [this]( string_view s, bool is_done) { fail_parser p; @@ -482,7 +482,7 @@ class basic_parser_test ec); if(! BOOST_TEST(! ec)) { - ::test_suite::log_type() << " failed to parse: " << s; + log << " failed to parse: " << s; return; } BOOST_TEST(is_done ==