From cc3e866b9fd2ec817b24cc08ef86befed5e3265d Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Wed, 30 Sep 2020 13:47:21 -0500 Subject: [PATCH 01/28] Move and rename benchmark to it separate benchmark directory. --- {samples => benchmarks}/CMakeLists.txt | 6 +++--- samples/benchmark.cpp => benchmarks/concurrency.cpp | 0 samples/benchmark.cu => benchmarks/concurrency.cu | 0 3 files changed, 3 insertions(+), 3 deletions(-) rename {samples => benchmarks}/CMakeLists.txt (92%) rename samples/benchmark.cpp => benchmarks/concurrency.cpp (100%) rename samples/benchmark.cu => benchmarks/concurrency.cu (100%) diff --git a/samples/CMakeLists.txt b/benchmarks/CMakeLists.txt similarity index 92% rename from samples/CMakeLists.txt rename to benchmarks/CMakeLists.txt index 75fd8fab73..736f544810 100644 --- a/samples/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -48,11 +48,11 @@ include_directories(${CMAKE_SOURCE_DIR}/../include) find_package(Threads REQUIRED) find_package(OpenMP) -add_executable(benchmark_host benchmark.cpp) +add_executable(concurrency_host concurrency.cpp) -target_link_libraries(benchmark_host PUBLIC Threads::Threads) +target_link_libraries(concurrency_host PUBLIC Threads::Threads) if(OpenMP_CXX_FOUND) target_link_libraries(benchmark_host PUBLIC OpenMP::OpenMP_CXX) endif() -add_executable(benchmark_device benchmark.cu) +add_executable(concurrency_device concurrency.cu) diff --git a/samples/benchmark.cpp b/benchmarks/concurrency.cpp similarity index 100% rename from samples/benchmark.cpp rename to benchmarks/concurrency.cpp diff --git a/samples/benchmark.cu b/benchmarks/concurrency.cu similarity index 100% rename from samples/benchmark.cu rename to benchmarks/concurrency.cu From 25d155c507fe1bfe1f996b8ba2488cb356919f12 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Wed, 30 Sep 2020 13:53:45 -0500 Subject: [PATCH 02/28] Use CUDA as project language. --- benchmarks/CMakeLists.txt | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 736f544810..3a3ba73bef 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -1,15 +1,5 @@ -cmake_minimum_required(VERSION 3.8) -project(libcudacxx-samples CXX) - -if (NOT "${CMAKE_CUDA_HOST_COMPILER}" STREQUAL "") - unset(CMAKE_CUDA_HOST_COMPILER CACHE) - message(FATAL_ERROR "Thrust tests and examples require the C++ compiler" - " and the CUDA host compiler to be the same; to set this compiler, please" - " use the CMAKE_CXX_COMPILER variable, not the CMAKE_CUDA_HOST_COMPILER" - " variable.") - endif () -set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER}) -enable_language(CUDA) +cmake_minimum_required(VERSION 3.18) +project(libcudacxx-samples LANGUAGE CXX CUDA) set(CMAKE_BUILD_TYPE "RelWithDebInfo") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --expt-extended-lambda") From e8cc0bb44e874348ae1ebbb444d740c2892dcb31 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Wed, 30 Sep 2020 14:46:41 -0500 Subject: [PATCH 03/28] Revampe benchmark cmake script. --- benchmarks/CMakeLists.txt | 68 +++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 39 deletions(-) diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 3a3ba73bef..9c51c50444 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -1,48 +1,38 @@ cmake_minimum_required(VERSION 3.18) -project(libcudacxx-samples LANGUAGE CXX CUDA) + +project(libcudacxx-benchmarks LANGUAGES CXX CUDA) set(CMAKE_BUILD_TYPE "RelWithDebInfo") -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --expt-extended-lambda") - -set(HIGHEST_COMPUTE_ARCH 80) -set(KNOWN_COMPUTE_ARCHS 70 72 75 80) - -option(DISABLE_ARCH_BY_DEFAULT "If ON, then all CUDA architectures are disabled on the initial CMake run." OFF) -set(OPTION_INIT ON) -if (DISABLE_ARCH_BY_DEFAULT) - set(OPTION_INIT OFF) -endif () -if (NOT ${HIGHEST_COMPUTE_ARCH} IN_LIST KNOWN_COMPUTE_ARCHS) - message(FATAL_ERROR "When changing the highest compute version, don't forget to add it to the list!") -endif () - -foreach (COMPUTE_ARCH IN LISTS KNOWN_COMPUTE_ARCHS) - option(ENABLE_COMPUTE_${COMPUTE_ARCH} "Enable code generation for tests for sm_${COMPUTE_ARCH}" ${OPTION_INIT}) - if (ENABLE_COMPUTE_${COMPUTE_ARCH}) - set(COMPUTE_ARCHS "${COMPUTE_ARCHS} ${COMPUTE_ARCH}") - set(COMPUTE_MESSAGE "${COMPUTE_MESSAGE} sm_${COMPUTE_ARCH}") - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_${COMPUTE_ARCH},code=sm_${COMPUTE_ARCH}") - endif () -endforeach () - -option(ENABLE_COMPUTE_FUTURE "Enable code generation for tests for compute_${HIGHEST_COMPUTE_ARCH}" ${OPTION_INIT}) -if (ENABLE_COMPUTE_FUTURE) - set(COMPUTE_MESSAGE "${COMPUTE_MESSAGE} compute_${HIGHEST_COMPUTE_ARCH}") - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_${HIGHEST_COMPUTE_ARCH},code=compute_${HIGHEST_COMPUTE_ARCH}") -endif () - -message(STATUS "Enabled CUDA architectures:${COMPUTE_MESSAGE}") - -include_directories(${CMAKE_SOURCE_DIR}/../include) + +set(GPU_ARCHS "70;72;75;80" CACHE STRING "List of GPU architectures to be compiled for.") + +message(STATUS "Enabled CUDA architectures:${GPU_ARCHS}") find_package(Threads REQUIRED) find_package(OpenMP) -add_executable(concurrency_host concurrency.cpp) +function(ConfigureHostBench BENCH_NAME BENCH_SRC) + add_executable("${BENCH_NAME}" "${BENCH_SRC}") + target_link_libraries("${BENCH_NAME}" PRIVATE Threads::Threads) + + # TODO: Link against libcudaxx interface target instead + target_include_directories("${BENCH_NAME}" PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/../include") + if(OpenMP_CXX_FOUND) + target_link_libraries("${BENCH_NAME}" PRIVATE OpenMP::OpenMP_CXX) + endif() +endfunction(ConfigureHostBench) + +function(ConfigureDeviceBench BENCH_NAME BENCH_SRC) + add_executable("${BENCH_NAME}" "${BENCH_SRC}") + set_property(TARGET "${BENCH_NAME}" PROPERTY CUDA_ARCHITECTURES "${GPU_ARCHS}") + # TODO: Link against libcudaxx interface target instead + target_include_directories("${BENCH_NAME}" PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/../include") + target_compile_options("${BENCH_NAME}" PRIVATE --expt-extended-lambda --expt-relaxed-constexpr) +endfunction(ConfigureDeviceBench) + +ConfigureHostBench(concurrency_host concurrency.cpp) -target_link_libraries(concurrency_host PUBLIC Threads::Threads) -if(OpenMP_CXX_FOUND) - target_link_libraries(benchmark_host PUBLIC OpenMP::OpenMP_CXX) -endif() +ConfigureDeviceBench(concurrent_device concurrency.cu) -add_executable(concurrency_device concurrency.cu) From 3c32ceb26611b1ab00d4f1b21c596508c97589be Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Wed, 30 Sep 2020 14:49:27 -0500 Subject: [PATCH 04/28] typo --- benchmarks/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 9c51c50444..f2ebafe007 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -34,5 +34,5 @@ endfunction(ConfigureDeviceBench) ConfigureHostBench(concurrency_host concurrency.cpp) -ConfigureDeviceBench(concurrent_device concurrency.cu) +ConfigureDeviceBench(concurrency_device concurrency.cu) From 60e7fb3162a9cceef21b06eac9870bde68021564 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Wed, 30 Sep 2020 15:11:47 -0500 Subject: [PATCH 05/28] tense. --- benchmarks/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index f2ebafe007..0604ba0c84 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -4,7 +4,7 @@ project(libcudacxx-benchmarks LANGUAGES CXX CUDA) set(CMAKE_BUILD_TYPE "RelWithDebInfo") -set(GPU_ARCHS "70;72;75;80" CACHE STRING "List of GPU architectures to be compiled for.") +set(GPU_ARCHS "70;72;75;80" CACHE STRING "List of GPU architectures to compile for.") message(STATUS "Enabled CUDA architectures:${GPU_ARCHS}") From 325513ede53c01a1e1df459c3ad4447674571873 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Thu, 1 Oct 2020 13:30:43 -0500 Subject: [PATCH 06/28] Changed path to text files. --- samples/trie_st.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/trie_st.cpp b/samples/trie_st.cpp index f0fb4f1c92..7c0e313eb8 100644 --- a/samples/trie_st.cpp +++ b/samples/trie_st.cpp @@ -91,8 +91,8 @@ int main() { std::string input; char const* files[] = { - "2600-0.txt", "2701-0.txt", "35-0.txt", "84-0.txt", "8800.txt", - "pg1727.txt", "pg55.txt", "pg6130.txt", "pg996.txt", "1342-0.txt" + "books/2600-0.txt", "books/2701-0.txt", "books/35-0.txt", "books/84-0.txt", "books/8800.txt", + "books/pg1727.txt", "books/pg55.txt", "books/pg6130.txt", "books/pg996.txt", "books/1342-0.txt" }; for(auto* ptr : files) { From 50ad605da7b4e1070f9baa552dd02097ee76a738 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Thu, 1 Oct 2020 13:30:57 -0500 Subject: [PATCH 07/28] Add error checking to opening files. --- samples/trie_st.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/samples/trie_st.cpp b/samples/trie_st.cpp index 7c0e313eb8..678c71e252 100644 --- a/samples/trie_st.cpp +++ b/samples/trie_st.cpp @@ -96,8 +96,13 @@ int main() { }; for(auto* ptr : files) { + std::cout << ptr << std::endl; auto const cur = input.size(); std::ifstream in(ptr); + if(in.fail()) { + std::cerr << "Failed to open file: " << ptr << std::endl; + return -1; + } in.seekg(0, std::ios_base::end); auto const pos = in.tellg(); input.resize(cur + pos); From afd58a715766c9dfe1730b958be625befbb01752 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Thu, 1 Oct 2020 13:31:12 -0500 Subject: [PATCH 08/28] Add initial cmake for building trie_st. --- samples/CMakeLists.txt | 51 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 samples/CMakeLists.txt diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt new file mode 100644 index 0000000000..b8b43d2ed8 --- /dev/null +++ b/samples/CMakeLists.txt @@ -0,0 +1,51 @@ +cmake_minimum_required(VERSION 3.18) + +project(libcudacxx-examples LANGUAGES CXX CUDA) + +set(CMAKE_BUILD_TYPE "RelWithDebInfo") + +set(GPU_ARCHS "70;72;75;80" CACHE STRING "List of GPU architectures to compile for.") + +message(STATUS "Enabled CUDA architectures:${GPU_ARCHS}") + +find_package(CUDAToolkit REQUIRED) +find_package(Threads REQUIRED) +find_package(OpenMP) + +function(ConfigureHostBench BENCH_NAME BENCH_SRC) + add_executable("${BENCH_NAME}" "${BENCH_SRC}") + target_link_libraries("${BENCH_NAME}" PRIVATE Threads::Threads) + + # TODO: Link against libcudaxx interface target instead + target_include_directories("${BENCH_NAME}" PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/../include") + if(OpenMP_CXX_FOUND) + target_link_libraries("${BENCH_NAME}" PRIVATE OpenMP::OpenMP_CXX) + endif() +endfunction(ConfigureHostBench) + +function(ConfigureDeviceBench BENCH_NAME BENCH_SRC) + add_executable("${BENCH_NAME}" "${BENCH_SRC}") + set_property(TARGET "${BENCH_NAME}" PROPERTY CUDA_ARCHITECTURES "${GPU_ARCHS}") + # TODO: Link against libcudaxx interface target instead + target_include_directories("${BENCH_NAME}" PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/../include") + target_compile_options("${BENCH_NAME}" PRIVATE --expt-extended-lambda --expt-relaxed-constexpr) +endfunction(ConfigureDeviceBench) + + +add_executable(trie_st trie_st.cpp) +target_compile_features(trie_st PRIVATE cxx_std_11) +execute_process(COMMAND mkdir -p books) +file(DOWNLOAD https://www.gutenberg.org/files/2600/2600-0.txt books/2600-0.txt) +file(DOWNLOAD http://www.gutenberg.org/cache/epub/996/pg996.txt books/pg996.txt) +file(DOWNLOAD http://www.gutenberg.org/cache/epub/55/pg55.txt books/pg55.txt) +file(DOWNLOAD https://www.gutenberg.org/files/8800/8800.txt books/8800.txt) +file(DOWNLOAD https://www.gutenberg.org/files/84/84-0.txt books/84-0.txt) +file(DOWNLOAD http://www.gutenberg.org/cache/epub/6130/pg6130.txt books/pg6130.txt) +file(DOWNLOAD http://www.gutenberg.org/cache/epub/1727/pg1727.txt books/pg1727.txt) +file(DOWNLOAD https://www.gutenberg.org/files/2701/2701-0.txt books/2701-0.txt) +file(DOWNLOAD https://www.gutenberg.org/files/35/35-0.txt books/35-0.txt) +file(DOWNLOAD https://www.gutenberg.org/files/1342/1342-0.txt books/1342-0.txt) + + From cd8ef160523a6531506633636a2fbafc07f8c528 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Thu, 1 Oct 2020 13:36:54 -0500 Subject: [PATCH 09/28] Update txt file paths and add error checking to trie_mt. --- samples/trie_mt.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/samples/trie_mt.cpp b/samples/trie_mt.cpp index e59407b0c5..3dc7fd8f20 100644 --- a/samples/trie_mt.cpp +++ b/samples/trie_mt.cpp @@ -120,13 +120,18 @@ int main() { std::string input; char const* files[] = { - "2600-0.txt", "2701-0.txt", "35-0.txt", "84-0.txt", "8800.txt", - "pg1727.txt", "pg55.txt", "pg6130.txt", "pg996.txt", "1342-0.txt" + "books/2600-0.txt", "books/2701-0.txt", "books/35-0.txt", "books/84-0.txt", "books/8800.txt", + "books/pg1727.txt", "books/pg55.txt", "books/pg6130.txt", "books/pg996.txt", "books/1342-0.txt" }; for(auto* ptr : files) { + std::cout << ptr << std::endl; auto const cur = input.size(); std::ifstream in(ptr); + if(in.fail()) { + std::cerr << "Failed to open file: " << ptr << std::endl; + return -1; + } in.seekg(0, std::ios_base::end); auto const pos = in.tellg(); input.resize(cur + pos); From e642ce13dbaea619690c8f0be09fe67cfcd08fd3 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Thu, 1 Oct 2020 13:37:05 -0500 Subject: [PATCH 10/28] Add cmake config for trie_mt. --- samples/CMakeLists.txt | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index b8b43d2ed8..51be530b4c 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -37,15 +37,19 @@ endfunction(ConfigureDeviceBench) add_executable(trie_st trie_st.cpp) target_compile_features(trie_st PRIVATE cxx_std_11) execute_process(COMMAND mkdir -p books) -file(DOWNLOAD https://www.gutenberg.org/files/2600/2600-0.txt books/2600-0.txt) -file(DOWNLOAD http://www.gutenberg.org/cache/epub/996/pg996.txt books/pg996.txt) -file(DOWNLOAD http://www.gutenberg.org/cache/epub/55/pg55.txt books/pg55.txt) -file(DOWNLOAD https://www.gutenberg.org/files/8800/8800.txt books/8800.txt) -file(DOWNLOAD https://www.gutenberg.org/files/84/84-0.txt books/84-0.txt) -file(DOWNLOAD http://www.gutenberg.org/cache/epub/6130/pg6130.txt books/pg6130.txt) -file(DOWNLOAD http://www.gutenberg.org/cache/epub/1727/pg1727.txt books/pg1727.txt) -file(DOWNLOAD https://www.gutenberg.org/files/2701/2701-0.txt books/2701-0.txt) -file(DOWNLOAD https://www.gutenberg.org/files/35/35-0.txt books/35-0.txt) -file(DOWNLOAD https://www.gutenberg.org/files/1342/1342-0.txt books/1342-0.txt) +file(DOWNLOAD https://www.gutenberg.org/files/2600/2600-0.txt books/2600-0.txt SHOW_PROGRESS) +file(DOWNLOAD http://www.gutenberg.org/cache/epub/996/pg996.txt books/pg996.txt SHOW_PROGRESS) +file(DOWNLOAD http://www.gutenberg.org/cache/epub/55/pg55.txt books/pg55.txt SHOW_PROGRESS) +file(DOWNLOAD https://www.gutenberg.org/files/8800/8800.txt books/8800.txt SHOW_PROGRESS) +file(DOWNLOAD https://www.gutenberg.org/files/84/84-0.txt books/84-0.txt SHOW_PROGRESS) +file(DOWNLOAD http://www.gutenberg.org/cache/epub/6130/pg6130.txt books/pg6130.txt SHOW_PROGRESS) +file(DOWNLOAD http://www.gutenberg.org/cache/epub/1727/pg1727.txt books/pg1727.txt SHOW_PROGRESS) +file(DOWNLOAD https://www.gutenberg.org/files/2701/2701-0.txt books/2701-0.txt SHOW_PROGRESS) +file(DOWNLOAD https://www.gutenberg.org/files/35/35-0.txt books/35-0.txt SHOW_PROGRESS) +file(DOWNLOAD https://www.gutenberg.org/files/1342/1342-0.txt books/1342-0.txt SHOW_PROGRESS) + +add_executable(trie_mt trie_mt.cpp) +target_compile_features(trie_mt PRIVATE cxx_std_11) +target_link_libraries(trie_mt Threads::Threads) From e504db4fa85349785e2e07c9572f96005f8be515 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Thu, 1 Oct 2020 13:41:13 -0500 Subject: [PATCH 11/28] Update paths and error checking for trie.cu. --- samples/trie.cu | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/samples/trie.cu b/samples/trie.cu index 9abef0763e..ba8f99b47e 100644 --- a/samples/trie.cu +++ b/samples/trie.cu @@ -179,13 +179,18 @@ int main() { std::basic_string, managed_allocator> input; char const* files[] = { - "2600-0.txt", "2701-0.txt", "35-0.txt", "84-0.txt", "8800.txt", - "pg1727.txt", "pg55.txt", "pg6130.txt", "pg996.txt", "1342-0.txt" + "books/2600-0.txt", "books/2701-0.txt", "books/35-0.txt", "books/84-0.txt", "books/8800.txt", + "books/pg1727.txt", "books/pg55.txt", "books/pg6130.txt", "books/pg996.txt", "books/1342-0.txt" }; for(auto* ptr : files) { + std::cout << ptr << std::endl; auto const cur = input.size(); std::ifstream in(ptr); + if(in.fail()) { + std::cerr << "Failed to open file: " << ptr << std::endl; + return -1; + } in.seekg(0, std::ios_base::end); auto const pos = in.tellg(); input.resize(cur + pos); From acf3db30af21ce69b0289330533c5db4e65093a3 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Thu, 1 Oct 2020 13:41:30 -0500 Subject: [PATCH 12/28] Only download books if they don't already exists. --- samples/CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 51be530b4c..425c5683c7 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -36,7 +36,9 @@ endfunction(ConfigureDeviceBench) add_executable(trie_st trie_st.cpp) target_compile_features(trie_st PRIVATE cxx_std_11) -execute_process(COMMAND mkdir -p books) + +if(NOT (EXISTS books)) + execute_process(COMMAND mkdir books) file(DOWNLOAD https://www.gutenberg.org/files/2600/2600-0.txt books/2600-0.txt SHOW_PROGRESS) file(DOWNLOAD http://www.gutenberg.org/cache/epub/996/pg996.txt books/pg996.txt SHOW_PROGRESS) file(DOWNLOAD http://www.gutenberg.org/cache/epub/55/pg55.txt books/pg55.txt SHOW_PROGRESS) @@ -47,6 +49,7 @@ file(DOWNLOAD http://www.gutenberg.org/cache/epub/1727/pg1727.txt books/pg1727.t file(DOWNLOAD https://www.gutenberg.org/files/2701/2701-0.txt books/2701-0.txt SHOW_PROGRESS) file(DOWNLOAD https://www.gutenberg.org/files/35/35-0.txt books/35-0.txt SHOW_PROGRESS) file(DOWNLOAD https://www.gutenberg.org/files/1342/1342-0.txt books/1342-0.txt SHOW_PROGRESS) +endif() add_executable(trie_mt trie_mt.cpp) target_compile_features(trie_mt PRIVATE cxx_std_11) From f2e0eaa308674bb1ffc96b46377e1c97955df221 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Thu, 1 Oct 2020 13:41:37 -0500 Subject: [PATCH 13/28] Add config for trie. --- samples/CMakeLists.txt | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 425c5683c7..28e0d66339 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -39,20 +39,25 @@ target_compile_features(trie_st PRIVATE cxx_std_11) if(NOT (EXISTS books)) execute_process(COMMAND mkdir books) -file(DOWNLOAD https://www.gutenberg.org/files/2600/2600-0.txt books/2600-0.txt SHOW_PROGRESS) -file(DOWNLOAD http://www.gutenberg.org/cache/epub/996/pg996.txt books/pg996.txt SHOW_PROGRESS) -file(DOWNLOAD http://www.gutenberg.org/cache/epub/55/pg55.txt books/pg55.txt SHOW_PROGRESS) -file(DOWNLOAD https://www.gutenberg.org/files/8800/8800.txt books/8800.txt SHOW_PROGRESS) -file(DOWNLOAD https://www.gutenberg.org/files/84/84-0.txt books/84-0.txt SHOW_PROGRESS) -file(DOWNLOAD http://www.gutenberg.org/cache/epub/6130/pg6130.txt books/pg6130.txt SHOW_PROGRESS) -file(DOWNLOAD http://www.gutenberg.org/cache/epub/1727/pg1727.txt books/pg1727.txt SHOW_PROGRESS) -file(DOWNLOAD https://www.gutenberg.org/files/2701/2701-0.txt books/2701-0.txt SHOW_PROGRESS) -file(DOWNLOAD https://www.gutenberg.org/files/35/35-0.txt books/35-0.txt SHOW_PROGRESS) -file(DOWNLOAD https://www.gutenberg.org/files/1342/1342-0.txt books/1342-0.txt SHOW_PROGRESS) + file(DOWNLOAD https://www.gutenberg.org/files/2600/2600-0.txt books/2600-0.txt SHOW_PROGRESS) + file(DOWNLOAD http://www.gutenberg.org/cache/epub/996/pg996.txt books/pg996.txt SHOW_PROGRESS) + file(DOWNLOAD http://www.gutenberg.org/cache/epub/55/pg55.txt books/pg55.txt SHOW_PROGRESS) + file(DOWNLOAD https://www.gutenberg.org/files/8800/8800.txt books/8800.txt SHOW_PROGRESS) + file(DOWNLOAD https://www.gutenberg.org/files/84/84-0.txt books/84-0.txt SHOW_PROGRESS) + file(DOWNLOAD http://www.gutenberg.org/cache/epub/6130/pg6130.txt books/pg6130.txt SHOW_PROGRESS) + file(DOWNLOAD http://www.gutenberg.org/cache/epub/1727/pg1727.txt books/pg1727.txt SHOW_PROGRESS) + file(DOWNLOAD https://www.gutenberg.org/files/2701/2701-0.txt books/2701-0.txt SHOW_PROGRESS) + file(DOWNLOAD https://www.gutenberg.org/files/35/35-0.txt books/35-0.txt SHOW_PROGRESS) + file(DOWNLOAD https://www.gutenberg.org/files/1342/1342-0.txt books/1342-0.txt SHOW_PROGRESS) endif() add_executable(trie_mt trie_mt.cpp) target_compile_features(trie_mt PRIVATE cxx_std_11) target_link_libraries(trie_mt Threads::Threads) +add_executable(trie_cuda trie.cu) +target_compile_features(trie_cuda PRIVATE cxx_std_11 cuda_std_11) +target_compile_options(trie_cuda PRIVATE --expt-relaxed-constexpr) +set_property(TARGET trie_cuda PROPERTY CUDA_ARCHITECTURES 70) + From 9ca2303bceac926984433debddca0027bd4b52bb Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Thu, 1 Oct 2020 13:51:58 -0500 Subject: [PATCH 14/28] Add cmake config for trie.cu. --- samples/CMakeLists.txt | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 28e0d66339..095a65c89e 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -55,9 +55,13 @@ add_executable(trie_mt trie_mt.cpp) target_compile_features(trie_mt PRIVATE cxx_std_11) target_link_libraries(trie_mt Threads::Threads) -add_executable(trie_cuda trie.cu) -target_compile_features(trie_cuda PRIVATE cxx_std_11 cuda_std_11) -target_compile_options(trie_cuda PRIVATE --expt-relaxed-constexpr) -set_property(TARGET trie_cuda PROPERTY CUDA_ARCHITECTURES 70) +if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.1) + add_executable(trie_cuda trie.cu) + target_compile_features(trie_cuda PRIVATE cxx_std_11 cuda_std_11) + target_compile_options(trie_cuda PRIVATE --expt-relaxed-constexpr) + set_property(TARGET trie_cuda PROPERTY CUDA_ARCHITECTURES 70) +else() + message(STATUS "Insufficient CUDA version. Skipping trie.cu example.") +endif() From 32ae0d75ba4808809ea0929d87aa0052353e3a14 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Thu, 1 Oct 2020 14:09:53 -0500 Subject: [PATCH 15/28] Delete old scripts. --- samples/books.sh | 11 ----------- samples/linux.sh | 6 ------ samples/linux_clang.sh | 2 -- samples/linux_rtc.sh | 2 -- samples/openmp.sh | 1 - samples/windows.bat | 4 ---- 6 files changed, 26 deletions(-) delete mode 100755 samples/books.sh delete mode 100755 samples/linux.sh delete mode 100755 samples/linux_clang.sh delete mode 100755 samples/linux_rtc.sh delete mode 100755 samples/openmp.sh delete mode 100644 samples/windows.bat diff --git a/samples/books.sh b/samples/books.sh deleted file mode 100755 index 434119267d..0000000000 --- a/samples/books.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash -curl -O -# https://www.gutenberg.org/files/2600/2600-0.txt -curl -O -# http://www.gutenberg.org/cache/epub/996/pg996.txt -curl -O -# http://www.gutenberg.org/cache/epub/55/pg55.txt -curl -O -# https://www.gutenberg.org/files/8800/8800.txt -curl -O -# https://www.gutenberg.org/files/84/84-0.txt -curl -O -# http://www.gutenberg.org/cache/epub/6130/pg6130.txt -curl -O -# http://www.gutenberg.org/cache/epub/1727/pg1727.txt -curl -O -# https://www.gutenberg.org/files/2701/2701-0.txt -curl -O -# https://www.gutenberg.org/files/35/35-0.txt -curl -O -# https://www.gutenberg.org/files/1342/1342-0.txt diff --git a/samples/linux.sh b/samples/linux.sh deleted file mode 100755 index abde3255b3..0000000000 --- a/samples/linux.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env bash -g++ -std=c++11 trie_st.cpp -O2 -o trie_st -g++ -std=c++11 trie_mt.cpp -O2 -o trie_mt -pthread -nvcc -I../include -arch=compute_70 -std=c++11 -O2 trie.cu --expt-relaxed-constexpr -o trie -g++ -I../include -std=c++14 benchmark.cpp -O2 -lpthread -o benchmark -nvcc -I../include -arch=compute_70 -std=c++14 benchmark.cu -O2 -lpthread --expt-relaxed-constexpr --expt-extended-lambda -o benchmark diff --git a/samples/linux_clang.sh b/samples/linux_clang.sh deleted file mode 100755 index 852f12f504..0000000000 --- a/samples/linux_clang.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/usr/bin/env bash -clang++-7 -I../include --cuda-gpu-arch=sm_70 -std=c++11 -O2 trie.cu -L/usr/local/cuda/lib64/ -lcudart_static -pthread -ldl -lrt -o trie diff --git a/samples/linux_rtc.sh b/samples/linux_rtc.sh deleted file mode 100755 index 0d9e08716e..0000000000 --- a/samples/linux_rtc.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/usr/bin/env bash -nvcc rtc_example.cpp -lnvrtc -o rtc; ./rtc diff --git a/samples/openmp.sh b/samples/openmp.sh deleted file mode 100755 index 2950adf1d9..0000000000 --- a/samples/openmp.sh +++ /dev/null @@ -1 +0,0 @@ -clang++ -D_LIBCPP_BARRIER_BUTTERFLY -I../include -fopenmp=libomp -L../../llvm-project/build/lib/ -std=c++11 -O2 benchmark.cpp -lstdc++ -lpthread -lm -o benchmark diff --git a/samples/windows.bat b/samples/windows.bat deleted file mode 100644 index 176a7a01a3..0000000000 --- a/samples/windows.bat +++ /dev/null @@ -1,4 +0,0 @@ -call vcvars64.bat -cl /EHsc trie_st.cpp /O2 -cl /EHsc trie_mt.cpp /O2 -nvcc -I../include -arch=compute_70 -O2 trie.cu --expt-relaxed-constexpr -Xcompiler /Zc:__cplusplus -o trie From bbde1ee4b704295dde544137926ed9989135d3d0 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Thu, 1 Oct 2020 14:10:08 -0500 Subject: [PATCH 16/28] Update include paths. --- samples/rtc_example.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/rtc_example.cpp b/samples/rtc_example.cpp index 85fb047d3a..358f14d408 100644 --- a/samples/rtc_example.cpp +++ b/samples/rtc_example.cpp @@ -39,9 +39,9 @@ THE SOFTWARE. const char *trie = R"xxx( -#include -#include -#include +#include +#include +#include template static constexpr T min(T a, T b) { return a < b ? a : b; } From 39a06457107d118ae72f338946bf6a2820c36129 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Thu, 1 Oct 2020 14:10:27 -0500 Subject: [PATCH 17/28] Remove hardcoded paths in nvrtc example. --- samples/rtc_example.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/samples/rtc_example.cpp b/samples/rtc_example.cpp index 358f14d408..b9e7dd8ad9 100644 --- a/samples/rtc_example.cpp +++ b/samples/rtc_example.cpp @@ -128,14 +128,12 @@ int main(int argc, char *argv[]) const char *opts[] = {"-std=c++11", "-I/usr/include/linux", - "-I/usr/include/c++/7.3.0", "-I/usr/local/cuda/include", - "-I/home/olivier/freestanding/include", "--gpu-architecture=compute_70", "--relocatable-device-code=true", "-default-device"}; nvrtcResult compileResult = nvrtcCompileProgram(prog, // prog - 8, // numOptions + 6, // numOptions opts); // options // Obtain compilation log from the program. size_t logSize; From f2ecd553b4e81cd61e2b7d35bb90a00dd2bbfece Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Thu, 1 Oct 2020 14:11:45 -0500 Subject: [PATCH 18/28] Add cmake config for nvrtc example. --- samples/CMakeLists.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 095a65c89e..bf9739a55b 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -33,7 +33,6 @@ function(ConfigureDeviceBench BENCH_NAME BENCH_SRC) target_compile_options("${BENCH_NAME}" PRIVATE --expt-extended-lambda --expt-relaxed-constexpr) endfunction(ConfigureDeviceBench) - add_executable(trie_st trie_st.cpp) target_compile_features(trie_st PRIVATE cxx_std_11) @@ -65,3 +64,8 @@ else() endif() +add_executable(rtc rtc_example.cpp) +target_link_libraries(rtc CUDA::nvrtc) +target_compile_features(rtc PRIVATE cxx_std_11) + + From e6688a485b388fed8484f8286dc007f45fce39e6 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Thu, 1 Oct 2020 14:12:31 -0500 Subject: [PATCH 19/28] Remove unused cmake stuff. --- samples/CMakeLists.txt | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index bf9739a55b..95237157b2 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -4,35 +4,10 @@ project(libcudacxx-examples LANGUAGES CXX CUDA) set(CMAKE_BUILD_TYPE "RelWithDebInfo") -set(GPU_ARCHS "70;72;75;80" CACHE STRING "List of GPU architectures to compile for.") - -message(STATUS "Enabled CUDA architectures:${GPU_ARCHS}") - find_package(CUDAToolkit REQUIRED) find_package(Threads REQUIRED) find_package(OpenMP) -function(ConfigureHostBench BENCH_NAME BENCH_SRC) - add_executable("${BENCH_NAME}" "${BENCH_SRC}") - target_link_libraries("${BENCH_NAME}" PRIVATE Threads::Threads) - - # TODO: Link against libcudaxx interface target instead - target_include_directories("${BENCH_NAME}" PRIVATE - "${CMAKE_CURRENT_SOURCE_DIR}/../include") - if(OpenMP_CXX_FOUND) - target_link_libraries("${BENCH_NAME}" PRIVATE OpenMP::OpenMP_CXX) - endif() -endfunction(ConfigureHostBench) - -function(ConfigureDeviceBench BENCH_NAME BENCH_SRC) - add_executable("${BENCH_NAME}" "${BENCH_SRC}") - set_property(TARGET "${BENCH_NAME}" PROPERTY CUDA_ARCHITECTURES "${GPU_ARCHS}") - # TODO: Link against libcudaxx interface target instead - target_include_directories("${BENCH_NAME}" PRIVATE - "${CMAKE_CURRENT_SOURCE_DIR}/../include") - target_compile_options("${BENCH_NAME}" PRIVATE --expt-extended-lambda --expt-relaxed-constexpr) -endfunction(ConfigureDeviceBench) - add_executable(trie_st trie_st.cpp) target_compile_features(trie_st PRIVATE cxx_std_11) From 218a03337eccff405bdb33360594c023708bde24 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Thu, 1 Oct 2020 14:13:12 -0500 Subject: [PATCH 20/28] Doc. --- samples/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 95237157b2..c05d7910d9 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -11,6 +11,7 @@ find_package(OpenMP) add_executable(trie_st trie_st.cpp) target_compile_features(trie_st PRIVATE cxx_std_11) +# Download input files for the trie examples. if(NOT (EXISTS books)) execute_process(COMMAND mkdir books) file(DOWNLOAD https://www.gutenberg.org/files/2600/2600-0.txt books/2600-0.txt SHOW_PROGRESS) From ec2a4b6b3bb6dbf35ab55e17eeea3ffe063b4fc7 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Thu, 1 Oct 2020 14:13:30 -0500 Subject: [PATCH 21/28] Format. --- samples/CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index c05d7910d9..1b2c813123 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -39,9 +39,6 @@ else() message(STATUS "Insufficient CUDA version. Skipping trie.cu example.") endif() - add_executable(rtc rtc_example.cpp) target_link_libraries(rtc CUDA::nvrtc) target_compile_features(rtc PRIVATE cxx_std_11) - - From 20a702c9f2b9ca7e6a79ab404952bf652463fd45 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Fri, 2 Oct 2020 09:48:03 -0500 Subject: [PATCH 22/28] Replace simt namespace with cuda. --- samples/rtc_example.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/samples/rtc_example.cpp b/samples/rtc_example.cpp index b9e7dd8ad9..43d33b3d23 100644 --- a/samples/rtc_example.cpp +++ b/samples/rtc_example.cpp @@ -47,11 +47,11 @@ template static constexpr T min(T a, T b) { return a < b ? a : b; } struct trie { struct ref { - simt::std::atomic ptr = ATOMIC_VAR_INIT(nullptr); + cuda::std::atomic ptr = ATOMIC_VAR_INIT(nullptr); // the flag will protect against multiple pointer updates - simt::std::atomic_flag flag = ATOMIC_FLAG_INIT; + cuda::std::atomic_flag flag = ATOMIC_FLAG_INIT; } next[26]; - simt::std::atomic count = ATOMIC_VAR_INIT(0); + cuda::std::atomic count = ATOMIC_VAR_INIT(0); }; __host__ __device__ int index_of(char c) { @@ -61,7 +61,7 @@ int index_of(char c) { }; __host__ __device__ void make_trie(/* trie to insert word counts into */ trie& root, - /* bump allocator to get new nodes*/ simt::std::atomic& bump, + /* bump allocator to get new nodes*/ cuda::std::atomic& bump, /* input */ const char* begin, const char* end, /* thread this invocation is for */ unsigned index, /* how many threads there are */ unsigned domain) { @@ -80,7 +80,7 @@ void make_trie(/* trie to insert word counts into */ trie& root, auto const index = off >= size ? -1 : index_of(c); if(index == -1) { if(n != &root) { - n->count.fetch_add(1, simt::std::memory_order_relaxed); + n->count.fetch_add(1, cuda::std::memory_order_relaxed); n = &root; } //end of last word? @@ -89,20 +89,20 @@ void make_trie(/* trie to insert word counts into */ trie& root, else continue; } - if(n->next[index].ptr.load(simt::std::memory_order_acquire) == nullptr) { - if(n->next[index].flag.test_and_set(simt::std::memory_order_relaxed)) - while(n->next[index].ptr.load(simt::std::memory_order_acquire) == nullptr); + if(n->next[index].ptr.load(cuda::std::memory_order_acquire) == nullptr) { + if(n->next[index].flag.test_and_set(cuda::std::memory_order_relaxed)) + while(n->next[index].ptr.load(cuda::std::memory_order_acquire) == nullptr); else { - auto next = bump.fetch_add(1, simt::std::memory_order_relaxed); - n->next[index].ptr.store(next, simt::std::memory_order_release); + auto next = bump.fetch_add(1, cuda::std::memory_order_relaxed); + n->next[index].ptr.store(next, cuda::std::memory_order_release); } } - n = n->next[index].ptr.load(simt::std::memory_order_relaxed); + n = n->next[index].ptr.load(cuda::std::memory_order_relaxed); } } __global__ // __launch_bounds__(1024, 1) -void call_make_trie(trie* t, simt::std::atomic* bump, const char* begin, const char* end) { +void call_make_trie(trie* t, cuda::std::atomic* bump, const char* begin, const char* end) { auto const index = blockDim.x * blockIdx.x + threadIdx.x; auto const domain = gridDim.x * blockDim.x; From dff74e8136aaca422fd1c380e1ded83d2cd4a928 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Fri, 2 Oct 2020 09:48:22 -0500 Subject: [PATCH 23/28] Point nvrtc include path to relative location of libcu++. --- samples/rtc_example.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/rtc_example.cpp b/samples/rtc_example.cpp index 43d33b3d23..b97376ae4e 100644 --- a/samples/rtc_example.cpp +++ b/samples/rtc_example.cpp @@ -127,8 +127,8 @@ int main(int argc, char *argv[]) NULL)); // includeNames const char *opts[] = {"-std=c++11", - "-I/usr/include/linux", "-I/usr/local/cuda/include", + "-I../../include", "--gpu-architecture=compute_70", "--relocatable-device-code=true", "-default-device"}; From e291950f656c10886df2848d57629a475cb7549a Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Fri, 2 Oct 2020 09:50:22 -0500 Subject: [PATCH 24/28] Reorganize cmake file. --- samples/CMakeLists.txt | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 1b2c813123..ef7ed78e1d 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -8,9 +8,6 @@ find_package(CUDAToolkit REQUIRED) find_package(Threads REQUIRED) find_package(OpenMP) -add_executable(trie_st trie_st.cpp) -target_compile_features(trie_st PRIVATE cxx_std_11) - # Download input files for the trie examples. if(NOT (EXISTS books)) execute_process(COMMAND mkdir books) @@ -26,6 +23,9 @@ if(NOT (EXISTS books)) file(DOWNLOAD https://www.gutenberg.org/files/1342/1342-0.txt books/1342-0.txt SHOW_PROGRESS) endif() +add_executable(trie_st trie_st.cpp) +target_compile_features(trie_st PRIVATE cxx_std_11) + add_executable(trie_mt trie_mt.cpp) target_compile_features(trie_mt PRIVATE cxx_std_11) target_link_libraries(trie_mt Threads::Threads) @@ -39,6 +39,10 @@ else() message(STATUS "Insufficient CUDA version. Skipping trie.cu example.") endif() -add_executable(rtc rtc_example.cpp) -target_link_libraries(rtc CUDA::nvrtc) -target_compile_features(rtc PRIVATE cxx_std_11) +if(CUDAToolkit_VERSION VERSION_GREATER 10.2) + add_executable(rtc rtc_example.cpp) + target_link_libraries(rtc CUDA::nvrtc) + target_compile_features(rtc PRIVATE cxx_std_11) +else() + message(STATUS "Insufficient CUDA version. Skipping rtc_example.cpp example.") +endif() From e53370bc30adbfb2ac524f15a04d23879411b24b Mon Sep 17 00:00:00 2001 From: Bryce Adelstein Lelbach aka wash Date: Fri, 16 Oct 2020 11:40:34 -0700 Subject: [PATCH 25/28] Add concurrent hash table example. --- samples/concurrent_hash_table.cu | 260 +++++++++++++++++++++++++++++++ 1 file changed, 260 insertions(+) create mode 100644 samples/concurrent_hash_table.cu diff --git a/samples/concurrent_hash_table.cu b/samples/concurrent_hash_table.cu new file mode 100644 index 0000000000..f41c6b692a --- /dev/null +++ b/samples/concurrent_hash_table.cu @@ -0,0 +1,260 @@ +// Copyright (c) 2018-2020 NVIDIA Corporation +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Released under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + +#include +#include + +// TODO: It would be great if this example could NOT depend on Thrust. +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +// TODO: This should be upstreamed and then removed. +namespace thrust { + +using universal_raw_memory_resource = + thrust::system::cuda::detail::cuda_memory_resource< + thrust::system::cuda::detail::cudaMallocManaged, cudaFree, void* + >; + +template +using universal_allocator = + thrust::mr::stateless_resource_allocator; + +template +using universal_vector = thrust::device_vector>; + +} // thrust + +template < + typename Key, typename Value, + typename Hash = thrust::identity, + typename KeyEqual = thrust::equal_to, + typename MemoryResource = thrust::universal_raw_memory_resource +> +struct concurrent_hash_table { + // Elements transition from state_empty -> state_reserved -> + // state_filled; no other transitions are allowed. + enum state_type { + state_empty, state_reserved, state_filled + }; + + using key_type = Key; + using mapped_type = Value; + using size_type = cuda::std::uint64_t; + + using key_allocator = thrust::mr::stateless_resource_allocator< + key_type, MemoryResource + >; + using mapped_allocator = thrust::mr::stateless_resource_allocator< + mapped_type, MemoryResource + >; + using state_allocator = thrust::mr::stateless_resource_allocator< + cuda::std::atomic, MemoryResource + >; + + using key_iterator = typename key_allocator::pointer; + using value_iterator = typename mapped_allocator::pointer; + using state_iterator = typename state_allocator::pointer; + + // This whole thing is silly and should be a lambda, or at least a private + // nested class, but alas, NVCC doesn't like that. + struct element_destroyer { + private: + size_type capacity_; + key_iterator keys_; + value_iterator values_; + state_iterator states_; + + public: + __host__ __device__ + element_destroyer(size_type capacity, + key_iterator keys, + value_iterator values, + state_iterator states) + : capacity_(capacity), keys_(keys), values_(values), states_(states) + {} + + element_destroyer(element_destroyer const&) = default; + + __host__ __device__ + void operator()(size_type i) { + if (state_empty != states_[i]) { + (keys_ + i)->~key_type(); + (values_ + i)->~mapped_type(); + } + } + }; + +private: + size_type capacity_; + key_iterator keys_; + value_iterator values_; + state_iterator states_; + Hash hash_; + KeyEqual key_equal_; + +public: + __host__ + concurrent_hash_table(size_type capacity, + Hash hash = Hash(), + KeyEqual key_equal = KeyEqual()) + : capacity_(capacity) + , keys_(key_allocator{}.allocate(capacity_)) + , values_(mapped_allocator{}.allocate(capacity_)) + , states_(state_allocator{}.allocate(capacity_)) + , hash_(std::move(hash)) + , key_equal_(std::move(key_equal)) + { + thrust::uninitialized_fill(thrust::device, + states_, states_ + capacity_, + state_empty); + } + + __host__ + ~concurrent_hash_table() + { + thrust::for_each(thrust::device, + thrust::counting_iterator(0), + thrust::counting_iterator(capacity_), + element_destroyer(capacity_, keys_, values_, states_)); + } + + // TODO: Change return type to an enum with three possible values, succeeded, + // exists, and full. + template + __host__ __device__ + thrust::pair + try_emplace(UKey&& key, Args&&... args) { + auto index{hash_(key) % capacity_}; + // Linearly probe the storage space up to `capacity_` times; if we haven't + // succeeded by then, the container is full. + for (size_type i = 0; i < capacity_; ++i) { + state_type old = states_[index].load(cuda::std::memory_order_acquire); + while (old == state_empty) { + // As long as the state of this element is empty, attempt to set it to + // reserved. + if (states_[index].compare_exchange_weak( + old, state_reserved, cuda::std::memory_order_acq_rel)) + { + // We succeeded; the element is now "locked" as reserved. + new (keys_ + index) key_type(std::forward(key)); + new (values_ + index) mapped_type(std::forward(args)...); + states_[index].store(state_filled, cuda::std::memory_order_release); + return thrust::make_pair(values_ + index, true); + } + } + // If we are here, the element we are probing is not empty and we didn't + // fill it, so we need to wait for it to be filled. + while (state_filled != states_[index].load(cuda::std::memory_order_acquire)) + ; + // Now we know that the element we are probing has been filled by someone + // else, so we check if our key is equal to it. + if (key_equal_(keys_[index], key)) + // It is, so the element already exists. + return thrust::make_pair(values_ + index, false); + // Otherwise, the element isn't a match, so move on to the next element. + index = (index + 1) % capacity_; + } + // If we are here, the container is full. + return thrust::make_pair(value_iterator{}, false); + } + + __host__ __device__ + mapped_type& operator[](key_type const& key) { + return (*try_emplace(key).first); + } + __host__ __device__ + mapped_type& operator[](key_type&& key) { + return (*try_emplace(std::move(key)).first); + } +}; + +template +struct identity_modulo { +private: + T const modulo_; + +public: + __host__ __device__ + identity_modulo(T modulo) : modulo_(std::move(modulo)) {} + + identity_modulo(identity_modulo const&) = default; + + __host__ __device__ + T operator()(T i) { return i % modulo_; } +}; + +int main() { + { + using table = concurrent_hash_table>; + + auto freq = thrust::allocate_unique(thrust::universal_allocator
{}, 8); + + thrust::universal_vector input = [] { + thrust::universal_vector v(2048); + std::mt19937 gen(1337); + std::uniform_int_distribution dis(0, 7); + thrust::generate(v.begin(), v.end(), [&] { return dis(gen); }); + return v; + }(); + + thrust::for_each(thrust::device, input.begin(), input.end(), + [freq = freq.get()] __device__ (int i) { + (*freq)[i].fetch_add(1, cuda::std::memory_order_relaxed); + } + ); + + thrust::host_vector gold(8); + thrust::for_each(input.begin(), input.end(), [&] (int i) { ++gold[i]; }); + + for (cuda::std::uint64_t i = 0; i < 8; ++i) + std::cout << "i: " << i + << " gold: " << gold[i] + << " observed: " << (*freq)[i] << "\n"; + + assert(cudaSuccess == cudaDeviceSynchronize()); + } + { + using table = concurrent_hash_table, identity_modulo>; + + auto freq = thrust::allocate_unique
(thrust::universal_allocator
{}, 8, identity_modulo(4)); + + thrust::universal_vector input = [] { + thrust::universal_vector v(2048); + std::mt19937 gen(1337); + std::uniform_int_distribution dis(0, 7); + thrust::generate(v.begin(), v.end(), [&] { return dis(gen); }); + return v; + }(); + + thrust::for_each(thrust::device, input.begin(), input.end(), + [freq = freq.get()] __device__ (int i) { + (*freq)[i].fetch_add(1, cuda::std::memory_order_relaxed); + } + ); + + thrust::host_vector gold(8); + thrust::for_each(input.begin(), input.end(), [&] (int i) { ++gold[i]; }); + + for (cuda::std::uint64_t i = 0; i < 8; ++i) + std::cout << "i: " << i + << " gold: " << gold[i] + << " observed: " << (*freq)[i] << "\n"; + + assert(cudaSuccess == cudaDeviceSynchronize()); + } +} + From ba3538eb06232442a6e9c75e19142448e6cb0603 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Tue, 20 Oct 2020 14:20:12 -0500 Subject: [PATCH 26/28] rename samples -> examples. --- {samples => examples}/CMakeLists.txt | 0 {samples => examples}/concurrent_hash_table.cu | 0 {samples => examples}/rtc_example.cpp | 0 {samples => examples}/trie.cu | 0 {samples => examples}/trie_mt.cpp | 0 {samples => examples}/trie_st.cpp | 0 6 files changed, 0 insertions(+), 0 deletions(-) rename {samples => examples}/CMakeLists.txt (100%) rename {samples => examples}/concurrent_hash_table.cu (100%) rename {samples => examples}/rtc_example.cpp (100%) rename {samples => examples}/trie.cu (100%) rename {samples => examples}/trie_mt.cpp (100%) rename {samples => examples}/trie_st.cpp (100%) diff --git a/samples/CMakeLists.txt b/examples/CMakeLists.txt similarity index 100% rename from samples/CMakeLists.txt rename to examples/CMakeLists.txt diff --git a/samples/concurrent_hash_table.cu b/examples/concurrent_hash_table.cu similarity index 100% rename from samples/concurrent_hash_table.cu rename to examples/concurrent_hash_table.cu diff --git a/samples/rtc_example.cpp b/examples/rtc_example.cpp similarity index 100% rename from samples/rtc_example.cpp rename to examples/rtc_example.cpp diff --git a/samples/trie.cu b/examples/trie.cu similarity index 100% rename from samples/trie.cu rename to examples/trie.cu diff --git a/samples/trie_mt.cpp b/examples/trie_mt.cpp similarity index 100% rename from samples/trie_mt.cpp rename to examples/trie_mt.cpp diff --git a/samples/trie_st.cpp b/examples/trie_st.cpp similarity index 100% rename from samples/trie_st.cpp rename to examples/trie_st.cpp From 818d2b1a6b374da6b121d31aa5c3412e3eea56b9 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Tue, 20 Oct 2020 14:31:37 -0500 Subject: [PATCH 27/28] Add hash map to cmake file. --- examples/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index ef7ed78e1d..5347513325 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -46,3 +46,8 @@ if(CUDAToolkit_VERSION VERSION_GREATER 10.2) else() message(STATUS "Insufficient CUDA version. Skipping rtc_example.cpp example.") endif() + +add_executable(hash_map concurrent_hash_table.cu) +target_compile_features(hash_map PRIVATE cxx_std_14 cuda_std_14) +set_property(TARGET hash_map PROPERTY CUDA_ARCHITECTURES 70) +target_compile_options(hash_map PRIVATE --expt-extended-lambda) From 1b716a13444345c70b547842da2c32745a07af6f Mon Sep 17 00:00:00 2001 From: Bryce Adelstein Lelbach aka wash Date: Thu, 29 Oct 2020 12:39:54 -0700 Subject: [PATCH 28/28] Docs: Update link in README.md to point to the new examples directory. --- docs/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/readme.md b/docs/readme.md index 5cd8622844..f11b60e0d8 100644 --- a/docs/readme.md +++ b/docs/readme.md @@ -1,7 +1,7 @@ # libcu++: The C++ Standard Library for Your Entire System
- +
ExamplesExamples Godbolt Documentation