From a7e4d496089b1c6e23deefc14e327bcad9cf964c Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 27 Sep 2024 18:24:42 -0400 Subject: [PATCH 1/3] allows you to benchmark against 1 chosen URL. --- benchmarks/wpt_bench.cpp | 73 +++++++++++++++++++++++++++++++--------- 1 file changed, 58 insertions(+), 15 deletions(-) diff --git a/benchmarks/wpt_bench.cpp b/benchmarks/wpt_bench.cpp index 2b37db119..814e17db3 100644 --- a/benchmarks/wpt_bench.cpp +++ b/benchmarks/wpt_bench.cpp @@ -7,7 +7,11 @@ double url_examples_bytes{}; std::vector> url_examples; -size_t init_data(const char *source) { +enum { + ALL_URLS = -1, +}; + +size_t init_data(const char *source, int which_url) { ondemand::parser parser; std::vector> answer; @@ -16,19 +20,48 @@ size_t init_data(const char *source) { } padded_string json = padded_string::load(source); ondemand::document doc = parser.iterate(json); - for (auto element : doc.get_array()) { - if (element.type() == ondemand::json_type::object) { - std::string_view input; - if (element["input"].get_string(true).get(input) != simdjson::SUCCESS) { - printf("missing input.\n"); + if (which_url == ALL_URLS) { + for (auto element : doc.get_array()) { + if (element.type() == ondemand::json_type::object) { + std::string_view input; + if (element["input"].get_string(true).get(input) != simdjson::SUCCESS) { + printf("missing input.\n"); + continue; + } + std::string_view base; + if (element["base"].get_string(true).get(base) != simdjson::SUCCESS) { + // missing base is ok? + } + url_examples.push_back({std::string(input), std::string(base)}); + url_examples_bytes += input.size() + base.size(); } - std::string_view base; - if (element["base"].get_string(true).get(base) != simdjson::SUCCESS) { + } + } else { + size_t count = 0; + for (auto element : doc.get_array()) { + if (element.type() == ondemand::json_type::object) { + std::string_view input; + if (element["input"].get_string(true).get(input) != simdjson::SUCCESS) { + printf("missing input.\n"); + continue; + } + std::string_view base; + if (element["base"].get_string(true).get(base) != simdjson::SUCCESS) { + // missing base is ok? + } + if (count++ == which_url) { + url_examples.push_back({std::string(input), std::string(base)}); + url_examples_bytes += input.size() + base.size(); + break; + } } - url_examples.push_back({std::string(input), std::string(base)}); - url_examples_bytes += input.size() + base.size(); + } + if (url_examples.size() == 0) { + printf("# There are %zu urls in the file, index is %d.\n", count, + which_url); } } + printf("# recovered %zu urls.\n", url_examples.size()); return url_examples.size(); } @@ -52,7 +85,7 @@ static void BasicBench_AdaURL(benchmark::State &state) { } auto url = ada::parse(url_strings.first, base_ptr); if (url) { - href_size += url->get_href().size(); + href_size = href_size + url->get_href().size(); } } } @@ -75,7 +108,7 @@ static void BasicBench_AdaURL(benchmark::State &state) { } auto url = ada::parse(url_strings.first, base_ptr); if (url) { - href_size += url->get_href().size(); + href_size = href_size + url->get_href().size(); } } std::atomic_thread_fence(std::memory_order_release); @@ -134,7 +167,7 @@ static void BasicBench_whatwg(benchmark::State &state) { } upa::url url; if (upa::success(url.parse(url_strings.first, base_ptr))) { - success++; + success = success + 1; } } } @@ -154,7 +187,7 @@ static void BasicBench_whatwg(benchmark::State &state) { } upa::url url; if (upa::success(url.parse(url_strings.first, base_ptr))) { - success++; + success = success + 1; } } std::atomic_thread_fence(std::memory_order_release); @@ -195,13 +228,23 @@ BENCHMARK(BasicBench_whatwg); #endif // ADA_url_whatwg_ENABLED int main(int argc, char **argv) { - if (argc == 1 || !init_data(argv[1])) { + int which_url = ALL_URLS; + if (argc > 3 && std::string_view(argv[2]) == "--select") { + which_url = std::atoi(argv[3]); + printf("# Selecting url %d.\n", which_url); + } + if (argc == 1 || !init_data(argv[1], which_url)) { std::cout << "pass the path to the file wpt/urltestdata.json as a parameter." << std::endl; std::cout << "E.g., './build/benchmarks/wpt_bench tests/wpt/urltestdata.json'" << std::endl; + std::cout << "You can also select a single URL by passing --select ." + << std::endl; + std::cout << "E.g., './build/benchmarks/wpt_bench " + "tests/wpt/urltestdata.json --select 0'" + << std::endl; return EXIT_SUCCESS; } #if defined(ADA_RUST_VERSION) From e7bc6735724867a45f9bed7295d5c1bc450fc6cc Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 27 Sep 2024 22:47:07 -0400 Subject: [PATCH 2/3] we need c++20 --- src/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6d68b6285..5a1a8d383 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -8,6 +8,7 @@ add_library(ada-source INTERFACE) target_sources(ada-source INTERFACE $/ada.cpp) target_link_libraries(ada-source INTERFACE ada-include-source) add_library(ada ada.cpp) +target_compile_features(ada PUBLIC cxx_std_20) target_include_directories(ada PRIVATE $ ) target_include_directories(ada PUBLIC "$") From d9aec26f7efd80b78025eaa408965d624aea5fed Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Sat, 28 Sep 2024 16:07:37 -0400 Subject: [PATCH 3/3] workaround for aarch64 --- .github/workflows/aarch64.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/aarch64.yml b/.github/workflows/aarch64.yml index a2751aed6..5aed54916 100644 --- a/.github/workflows/aarch64.yml +++ b/.github/workflows/aarch64.yml @@ -36,6 +36,6 @@ jobs: apt-get update -q -y apt-get install -y cmake make g++ ninja-build git run: | - cmake -B build + cmake -DCMAKE_CXX_STANDARD=20 -B build cmake --build build ctest --test-dir build