From 4a25a6bb789b84456e9f220d5978b9c10bcbdf49 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Fri, 12 Aug 2022 16:23:48 +0200 Subject: [PATCH 1/9] Fixing race condition in cell permute 2 with NVHPC 22.5 / 22.7 (#847) * Fixing race condition in cell permute 2 : OpenACC [performance optimization] * Update Unit Test / Documentation --- coreneuron/permute/cellorder.cpp | 56 +++++++++++++------------------ tests/unit/solver/test_solver.cpp | 12 +++++-- 2 files changed, 33 insertions(+), 35 deletions(-) diff --git a/coreneuron/permute/cellorder.cpp b/coreneuron/permute/cellorder.cpp index 14feb31de..c95fedcf2 100644 --- a/coreneuron/permute/cellorder.cpp +++ b/coreneuron/permute/cellorder.cpp @@ -478,13 +478,12 @@ static void bksub_interleaved(NrnThread* nt, } // icore ranges [0:warpsize) ; stride[ncycle] +nrn_pragma_acc(routine vector) static void triang_interleaved2(NrnThread* nt, int icore, int ncycle, int* stride, int lastnode) { int icycle = ncycle - 1; int istride = stride[icycle]; int i = lastnode - istride + icore; -#ifndef CORENEURON_ENABLE_GPU int ii = i; -#endif // execute until all tree depths are executed bool has_subtrees_to_compute = true; @@ -492,11 +491,11 @@ static void triang_interleaved2(NrnThread* nt, int icore, int ncycle, int* strid // clang-format off nrn_pragma_acc(loop seq) for (; has_subtrees_to_compute; ) { // ncycle loop -#ifndef CORENEURON_ENABLE_GPU // serial test, gpu does this in parallel + nrn_pragma_acc(loop vector) + nrn_pragma_omp(loop bind(parallel)) for (int icore = 0; icore < warpsize; ++icore) { int i = ii + icore; -#endif if (icore < istride) { // most efficient if istride equal warpsize // what is the index int ip = GPU_PARENT(i); @@ -508,9 +507,7 @@ static void triang_interleaved2(NrnThread* nt, int icore, int ncycle, int* strid nrn_pragma_omp(atomic update) GPU_RHS(ip) -= p * GPU_RHS(i); } -#ifndef CORENEURON_ENABLE_GPU } -#endif // if finished with all tree depths then ready to break // (note that break is not allowed in OpenACC) if (icycle == 0) { @@ -520,14 +517,12 @@ static void triang_interleaved2(NrnThread* nt, int icore, int ncycle, int* strid --icycle; istride = stride[icycle]; i -= istride; -#ifndef CORENEURON_ENABLE_GPU ii -= istride; -#endif } - // clang-format on } // icore ranges [0:warpsize) ; stride[ncycle] +nrn_pragma_acc(routine vector) static void bksub_interleaved2(NrnThread* nt, int root, int lastroot, @@ -535,36 +530,29 @@ static void bksub_interleaved2(NrnThread* nt, int ncycle, int* stride, int firstnode) { -#ifndef CORENEURON_ENABLE_GPU - for (int i = root; i < lastroot; i += 1) { -#else nrn_pragma_acc(loop seq) - for (int i = root; i < lastroot; i += warpsize) { -#endif + for (int i = root; i < lastroot; i += 1) { GPU_RHS(i) /= GPU_D(i); // the root } int i = firstnode + icore; -#ifndef CORENEURON_ENABLE_GPU int ii = i; -#endif + nrn_pragma_acc(loop seq) for (int icycle = 0; icycle < ncycle; ++icycle) { int istride = stride[icycle]; -#ifndef CORENEURON_ENABLE_GPU // serial test, gpu does this in parallel + nrn_pragma_acc(loop vector) + nrn_pragma_omp(loop bind(parallel)) for (int icore = 0; icore < warpsize; ++icore) { int i = ii + icore; -#endif if (icore < istride) { int ip = GPU_PARENT(i); GPU_RHS(i) -= GPU_B(i) * GPU_RHS(ip); GPU_RHS(i) /= GPU_D(i); } i += istride; -#ifndef CORENEURON_ENABLE_GPU } ii += istride; -#endif } } @@ -600,15 +588,24 @@ void solve_interleaved2(int ith) { defined(_OPENACC) int nstride = stridedispl[nwarp]; #endif - nrn_pragma_acc(parallel loop gang vector vector_length( - warpsize) present(nt [0:1], + /* If we compare this loop with the one from cellorder.cu (CUDA version), we will understand + * that the parallelism here is exposed in steps, while in the CUDA version all the parallelism + * is exposed from the very beginning of the loop. In more details, here we initially distribute + * the outermost loop, e.g. in the CUDA blocks, and for the innermost loops we explicitly use multiple + * threads for the parallelization (see for example the loop directives in triang/bksub_interleaved2). + * On the other hand, in the CUDA version the outermost loop is distributed to all the available threads, + * and therefore there is no need to have the innermost loops. Here, the loop/icore jumps every warpsize, + * while in the CUDA version the icore increases by one. Other than this, the two loop versions + * are equivalent (same results). + */ + nrn_pragma_acc(parallel loop gang present(nt [0:1], strides [0:nstride], ncycles [0:nwarp], stridedispl [0:nwarp + 1], rootbegin [0:nwarp + 1], nodebegin [0:nwarp + 1]) if (nt->compute_gpu) async(nt->stream_id)) - nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu)) - for (int icore = 0; icore < ncore; ++icore) { + nrn_pragma_omp(target teams loop if(nt->compute_gpu)) + for (int icore = 0; icore < ncore; icore += warpsize) { int iwarp = icore / warpsize; // figure out the >> value int ic = icore & (warpsize - 1); // figure out the & mask int ncycle = ncycles[iwarp]; @@ -617,14 +614,9 @@ void solve_interleaved2(int ith) { int lastroot = rootbegin[iwarp + 1]; int firstnode = nodebegin[iwarp]; int lastnode = nodebegin[iwarp + 1]; -#ifndef CORENEURON_ENABLE_GPU - if (ic == 0) { // serial test mode. triang and bksub do all cores in warp -#endif - triang_interleaved2(nt, ic, ncycle, stride, lastnode); - bksub_interleaved2(nt, root + ic, lastroot, ic, ncycle, stride, firstnode); -#ifndef CORENEURON_ENABLE_GPU - } // serial test mode -#endif + + triang_interleaved2(nt, ic, ncycle, stride, lastnode); + bksub_interleaved2(nt, root + ic, lastroot, ic, ncycle, stride, firstnode); } nrn_pragma_acc(wait(nt->stream_id)) #ifdef _OPENACC diff --git a/tests/unit/solver/test_solver.cpp b/tests/unit/solver/test_solver.cpp index 4c47913ba..6511f03e1 100644 --- a/tests/unit/solver/test_solver.cpp +++ b/tests/unit/solver/test_solver.cpp @@ -33,12 +33,12 @@ struct SolverData { constexpr auto magic_index_value = -2; constexpr auto magic_double_value = std::numeric_limits::lowest(); -// TODO: check out adding a CellPermute2_CPU version? enum struct SolverImplementation { CellPermute0_CPU, CellPermute0_GPU, CellPermute1_CPU, CellPermute1_GPU, + CellPermute2_CPU, CellPermute2_GPU, CellPermute2_CUDA }; @@ -52,6 +52,8 @@ std::ostream& operator<<(std::ostream& os, SolverImplementation impl) { return os << "SolverImplementation::CellPermute1_CPU"; } else if (impl == SolverImplementation::CellPermute1_GPU) { return os << "SolverImplementation::CellPermute1_GPU"; + } else if (impl == SolverImplementation::CellPermute2_CPU) { + return os << "SolverImplementation::CellPermute2_CPU"; } else if (impl == SolverImplementation::CellPermute2_GPU) { return os << "SolverImplementation::CellPermute2_GPU"; } else if (impl == SolverImplementation::CellPermute2_CUDA) { @@ -91,8 +93,11 @@ struct SetupThreads { break; case SolverImplementation::CellPermute2_CUDA: corenrn_param.cuda_interface = true; + [[fallthrough]]; case SolverImplementation::CellPermute2_GPU: corenrn_param.gpu = true; + [[fallthrough]]; + case SolverImplementation::CellPermute2_CPU: interleave_permute_type = 2; break; } @@ -259,9 +264,10 @@ auto solve_and_dump(Args&&... args) { } auto active_implementations() { - // These two are always available + // These are always available std::vector ret{SolverImplementation::CellPermute0_CPU, - SolverImplementation::CellPermute1_CPU}; + SolverImplementation::CellPermute1_CPU, + SolverImplementation::CellPermute2_CPU}; #ifdef CORENEURON_ENABLE_GPU // Consider making these steerable via a runtime switch in GPU builds ret.push_back(SolverImplementation::CellPermute0_GPU); From d6507e20ffdcf652b07e231347174f8f9c89e32a Mon Sep 17 00:00:00 2001 From: Nicolas Cornu Date: Thu, 25 Aug 2022 07:53:07 +0200 Subject: [PATCH 2/9] Use last hpc-coding-conventions formatter script (#850) Co-authored-by: Tristan Carel --- .bbp-project.yaml | 8 +++++++ .../workflows/clang_cmake_format_check.yaml | 23 ++++--------------- .gitignore | 1 + CMake/hpc-coding-conventions | 2 +- CMake/packages/FindSphinx.cmake | 11 ++++----- CMake/packages/Findlikwid.cmake | 5 ++-- CMake/packages/Findnmodl.cmake | 13 +++++++---- CMake/packages/Findreportinglib.cmake | 8 ++++--- CMakeLists.txt | 11 --------- 9 files changed, 36 insertions(+), 46 deletions(-) create mode 100644 .bbp-project.yaml diff --git a/.bbp-project.yaml b/.bbp-project.yaml new file mode 100644 index 000000000..d523b219f --- /dev/null +++ b/.bbp-project.yaml @@ -0,0 +1,8 @@ +tools: + ClangFormat: + enable: True + include: + match: + - coreneuron/.*\.((cu)|(h)|([chi]pp))$ + CMakeFormat: + enable: True diff --git a/.github/workflows/clang_cmake_format_check.yaml b/.github/workflows/clang_cmake_format_check.yaml index 8725fa9c1..1494941cc 100644 --- a/.github/workflows/clang_cmake_format_check.yaml +++ b/.github/workflows/clang_cmake_format_check.yaml @@ -14,24 +14,11 @@ jobs: steps: - name: Fetch repository uses: actions/checkout@v3 - - name: Install clang-format 11 - run: | - sudo apt-get update - sudo apt-get install clang-format-11 python3-pip libboost-all-dev libopenmpi-dev openmpi-bin - - name: Install cmake-format 0.6.13 - run: python3 -m pip install cmake-format==0.6.13 - - name: Configure + - name: Fetch hpc-coding-conventions submodules shell: bash working-directory: ${{runner.workspace}}/CoreNeuron - run: | - export PATH=/home/runner/.local/bin:$PATH - mkdir BUILD && cd BUILD - cmake -DCORENRN_CLANG_FORMAT=ON -DCORENRN_CMAKE_FORMAT=ON -DCORENRN_ENABLE_MPI=ON -DCORENRN_ENABLE_OPENMP=OFF -DClangFormat_EXECUTABLE=$(which clang-format-11) -DCMakeFormat_EXECUTABLE=$(which cmake-format) .. - - name: Run clang-format + run: git submodule update --init --depth 1 -- CMake/hpc-coding-conventions + - name: Run clang-format and cmake-format shell: bash - working-directory: ${{runner.workspace}}/CoreNeuron/BUILD - run: make check-clang-format VERBOSE=1 - - name: Run cmake-format - shell: bash - working-directory: ${{runner.workspace}}/CoreNeuron/BUILD - run: make check-cmake-format + working-directory: ${{runner.workspace}}/CoreNeuron + run: CMake/hpc-coding-conventions/bin/format -v --dry-run diff --git a/.gitignore b/.gitignore index e6c1ba624..e0813fb15 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ spconfig.* .clang-tidy .cmake-format.yaml .pre-commit-config.yaml +.bbp-project-venv/ diff --git a/CMake/hpc-coding-conventions b/CMake/hpc-coding-conventions index 5d4bcd2d4..939463f17 160000 --- a/CMake/hpc-coding-conventions +++ b/CMake/hpc-coding-conventions @@ -1 +1 @@ -Subproject commit 5d4bcd2d410e67bdc1d23d3280c08ee5c9df943b +Subproject commit 939463f1722352a376a2474ad33c75e59c0c388c diff --git a/CMake/packages/FindSphinx.cmake b/CMake/packages/FindSphinx.cmake index 2ffc285bf..f0701cd44 100644 --- a/CMake/packages/FindSphinx.cmake +++ b/CMake/packages/FindSphinx.cmake @@ -4,12 +4,11 @@ # See top-level LICENSE file for details. # ============================================================================= -find_program(SPHINX_EXECUTABLE - NAMES sphinx-build - DOC "/path/to/sphinx-build") +find_program( + SPHINX_EXECUTABLE + NAMES sphinx-build + DOC "/path/to/sphinx-build") include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(Sphinx - "Failed to find sphinx-build executable" - SPHINX_EXECUTABLE) \ No newline at end of file +find_package_handle_standard_args(Sphinx "Failed to find sphinx-build executable" SPHINX_EXECUTABLE) diff --git a/CMake/packages/Findlikwid.cmake b/CMake/packages/Findlikwid.cmake index 28f3da748..c2b4aa0ba 100644 --- a/CMake/packages/Findlikwid.cmake +++ b/CMake/packages/Findlikwid.cmake @@ -4,6 +4,7 @@ # See top-level LICENSE file for details. # ============================================================================= +# ~~~ # Findlikwid # ------------- # @@ -26,6 +27,7 @@ # likwid_FOUND - set to true if the library is found # likwid_INCLUDE - list of required include directories # likwid_LIBRARIES - list of required library directories +# ~~~ find_path(likwid_INCLUDE_DIRS "likwid.h" HINTS "${LIKWID_DIR}/include") find_library(likwid_LIBRARIES likwid HINTS "${LIKWID_DIR}/lib") @@ -33,5 +35,4 @@ find_library(likwid_LIBRARIES likwid HINTS "${LIKWID_DIR}/lib") # Checks 'REQUIRED', 'QUIET' and versions. include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(likwid - REQUIRED_VARS likwid_INCLUDE_DIRS likwid_LIBRARIES) +find_package_handle_standard_args(likwid REQUIRED_VARS likwid_INCLUDE_DIRS likwid_LIBRARIES) diff --git a/CMake/packages/Findnmodl.cmake b/CMake/packages/Findnmodl.cmake index b7b73fdbb..b76a3c2ec 100644 --- a/CMake/packages/Findnmodl.cmake +++ b/CMake/packages/Findnmodl.cmake @@ -4,6 +4,7 @@ # See top-level LICENSE file for details. # ============================================================================= +# ~~~ # Findnmodl # ------------- # @@ -26,11 +27,13 @@ # nmodl_FOUND - set to true if the library is found # nmodl_INCLUDE - list of required include directories # nmodl_BINARY - the nmodl binary - +# ~~~ # UNIX paths are standard, no need to write. -find_program(nmodl_BINARY NAMES nmodl${CMAKE_EXECUTABLE_SUFFIX} - HINTS "${CORENRN_NMODL_DIR}/bin" QUIET) +find_program( + nmodl_BINARY + NAMES nmodl${CMAKE_EXECUTABLE_SUFFIX} + HINTS "${CORENRN_NMODL_DIR}/bin" QUIET) find_path(nmodl_INCLUDE "nmodl/fast_math.ispc" HINTS "${CORENRN_NMODL_DIR}/include") find_path(nmodl_PYTHONPATH "nmodl/__init__.py" HINTS "${CORENRN_NMODL_DIR}/lib") @@ -38,7 +41,7 @@ find_path(nmodl_PYTHONPATH "nmodl/__init__.py" HINTS "${CORENRN_NMODL_DIR}/lib") # Checks 'REQUIRED', 'QUIET' and versions. include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(nmodl +find_package_handle_standard_args( + nmodl FOUND_VAR nmodl_FOUND REQUIRED_VARS nmodl_BINARY nmodl_INCLUDE nmodl_PYTHONPATH) - diff --git a/CMake/packages/Findreportinglib.cmake b/CMake/packages/Findreportinglib.cmake index 9c457327f..6755d57fd 100644 --- a/CMake/packages/Findreportinglib.cmake +++ b/CMake/packages/Findreportinglib.cmake @@ -4,6 +4,7 @@ # See top-level LICENSE file for details. # ============================================================================= +# ~~~ # Findreportinglib # ------------- # @@ -26,17 +27,18 @@ # reportinglib_FOUND - set to true if the library is found # reportinglib_INCLUDE_DIRS - list of required include directories # reportinglib_LIBRARIES - list of libraries to be linked +# ~~~ # UNIX paths are standard, no need to write. find_path(reportinglib_INCLUDE_DIR reportinglib/Report.h) find_library(reportinglib_LIBRARY reportinglib) get_filename_component(reportinglib_LIB_DIR ${reportinglib_LIBRARY} DIRECTORY) -find_program (reportinglib_somaDump somaDump ${reportinglib_LIB_DIR}/../bin) +find_program(reportinglib_somaDump somaDump ${reportinglib_LIB_DIR}/../bin) # Checks 'REQUIRED', 'QUIET' and versions. include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(reportinglib +find_package_handle_standard_args( + reportinglib FOUND_VAR reportinglib_FOUND REQUIRED_VARS reportinglib_INCLUDE_DIR reportinglib_LIBRARY reportinglib_LIB_DIR) - diff --git a/CMakeLists.txt b/CMakeLists.txt index cb1c96b6c..eda8f2feb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -85,17 +85,6 @@ list(APPEND CMAKE_MODULE_PATH ${CORENEURON_PROJECT_SOURCE_DIR}/CMake # ============================================================================= set(CODING_CONV_PREFIX "CORENRN") set(CORENRN_3RDPARTY_DIR "external") -# Adds .cu with respect to the current default in hpc-coding-conventions, and drops various patterns -# that don't match anything in CoreNEURON. -set(CORENRN_ClangFormat_FILES_RE - "^.*\\\\.cu$$" "^.*\\\\.h$$" "^.*\\\\.[chi]pp$$" - CACHE STRING "List of regular expressions matching C/C++ filenames" FORCE) -set(CORENRN_ClangFormat_EXCLUDES_RE - "" - CACHE STRING "list of regular expressions to exclude C/C++ files from formatting" FORCE) -set(CORENRN_CMakeFormat_EXCLUDES_RE - "CMake/packages/.*$$" - CACHE STRING "list of regular expressions to exclude CMake files from formatting" FORCE) include(AddHpcCodingConvSubmodule) add_subdirectory(CMake/hpc-coding-conventions/cpp) From 12272f80343871a79b5c8a8a42c0096ce7f42edc Mon Sep 17 00:00:00 2001 From: Pramod Kumbhar Date: Sun, 28 Aug 2022 18:13:24 +0200 Subject: [PATCH 3/9] Support for shared libraries in GPU execution (python launch support) (#795) * coreneuron and mechanism library can be built as shared and it enables launching coreneuron on GPU via python * update MOD2C and NMODL fixes to handle GLOBAL variables See BlueBrain/mod2c/pull/78 See BlueBrain/nmodl/pull/904 * removed acc/openmp global annotations for celsius, pi and secondorder and they don't need to be copied on GPU * Pass Memb_list* as an argument for all common prototypes in order to support global variables via argument * free ml->instance if not empty * add link to libscopmath in neuron as well * nrn_ghk is now declared inline. * homegrown present table to avoid dynamic loading + acc_deviceptr limitations * use -gpu=nordc and make #pragma acc routine seq functions inline * drop -lscopmath as its folded in elsewhere * random123 header reorganisation * try and cleanup CLI11 handling. * try and consolidate build logic * some CORENEURON_ -> CORENRN_ for consistency. * export OpenACC flags to NEURON separately as well as part of the whole ... -lcoreneuron ... link line. * libcoreneuron.so -> libcorenrnmech.so, try and fix static builds * do not enable OpenMP in shared/OpenACC builds. * add rpaths inside nrnivmodl-core. * accept a private destructor function pointer from generated mechanisms * drop ${TEST_EXEC_PREFIX} that was causing simple tests to be executed on many ranks. * CORENEURON_GPU_DEBUG: add environment variable that enables cnrn_target_* debug messages. fixes #141 Co-authored-by: Olli Lupton --- .gitlab-ci.yml | 200 ++++++++++------ CMake/MakefileBuildOptions.cmake | 143 +++++++++--- CMake/OpenAccHelper.cmake | 44 ++-- CMake/coreneuron-config.cmake.in | 6 +- CMakeLists.txt | 100 +++----- coreneuron/CMakeLists.txt | 218 +++++++++--------- coreneuron/apps/corenrn_parameters.cpp | 21 +- coreneuron/apps/corenrn_parameters.hpp | 36 ++- coreneuron/apps/main1.cpp | 26 ++- coreneuron/gpu/nrn_acc_manager.cpp | 215 ++++++++++++++++- coreneuron/gpu/nrn_acc_manager.hpp | 13 +- coreneuron/io/core2nrn_data_return.cpp | 16 +- coreneuron/io/global_vars.cpp | 2 +- coreneuron/io/nrn2core_data_init.cpp | 3 +- coreneuron/io/nrn_checkpoint.cpp | 6 +- coreneuron/io/nrn_setup.cpp | 8 + coreneuron/io/phase2.cpp | 6 +- coreneuron/mechanism/capac.cpp | 10 +- coreneuron/mechanism/eion.cpp | 76 +----- .../mechanism/mech/mod2c_core_thread.hpp | 8 +- coreneuron/mechanism/mechanism.hpp | 8 +- coreneuron/mechanism/membfunc.hpp | 84 +++++-- coreneuron/mechanism/nrnoc_ml.ispc | 4 +- coreneuron/mechanism/patternstim.cpp | 15 +- coreneuron/mechanism/register_mech.cpp | 22 +- coreneuron/mechanism/register_mech.hpp | 2 +- .../mpi/core/{nrnmpi.cpp => resolve.cpp} | 0 coreneuron/mpi/nrnmpi.h | 2 +- coreneuron/network/cvodestb.cpp | 12 - coreneuron/network/partrans.cpp | 19 +- coreneuron/nrnconf.h | 8 - coreneuron/nrnoc/nrnunits_modern.h | 36 --- coreneuron/sim/fast_imem.cpp | 8 +- coreneuron/sim/multicore.cpp | 9 +- coreneuron/sim/multicore.hpp | 15 +- coreneuron/sim/scopmath/errcodes.h | 39 +++- coreneuron/sim/scopmath/sparse_thread.hpp | 38 +-- coreneuron/utils/ispc/globals.cpp | 17 -- coreneuron/utils/memory.cpp | 2 +- coreneuron/utils/memory.h | 12 +- coreneuron/utils/nrnoc_aux.hpp | 5 - coreneuron/utils/offload.hpp | 135 +++++++++-- coreneuron/utils/randoms/nrnran123.cpp | 152 +++++------- coreneuron/utils/randoms/nrnran123.h | 113 ++++++--- coreneuron/utils/units.hpp | 38 +++ external/mod2c | 2 +- external/nmodl | 2 +- extra/nrnivmodl_core_makefile.in | 40 ++-- tests/CMakeLists.txt | 6 +- tests/integration/CMakeLists.txt | 55 ++--- tests/unit/alignment/CMakeLists.txt | 6 +- tests/unit/cmdline_interface/CMakeLists.txt | 12 +- .../test_cmdline_interface.cpp | 2 +- tests/unit/interleave_info/CMakeLists.txt | 11 +- tests/unit/lfp/CMakeLists.txt | 16 +- tests/unit/queueing/CMakeLists.txt | 10 +- tests/unit/solver/CMakeLists.txt | 14 +- tests/unit/solver/test_solver.cpp | 4 +- 58 files changed, 1287 insertions(+), 845 deletions(-) rename coreneuron/mpi/core/{nrnmpi.cpp => resolve.cpp} (100%) delete mode 100644 coreneuron/nrnoc/nrnunits_modern.h delete mode 100644 coreneuron/utils/ispc/globals.cpp create mode 100644 coreneuron/utils/units.hpp diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 737e867f0..b8fad911f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -81,24 +81,10 @@ build:nmodl: SPACK_PACKAGE_COMPILER: gcc # Build CoreNEURON -build:coreneuron:mod2c:nvhpc:acc: - extends: [.build, .spack_nvhpc] - variables: - SPACK_PACKAGE: coreneuron - # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type - SPACK_PACKAGE_SPEC: +gpu+openmp+tests~legacy-unit build_type=RelWithDebInfo - -# Build CoreNEURON with Unified Memory on GPU -build:coreneuron:mod2c:nvhpc:acc:unified: - extends: [.build, .spack_nvhpc] - variables: - SPACK_PACKAGE: coreneuron - # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type - SPACK_PACKAGE_SPEC: +gpu+unified+openmp+tests~legacy-unit build_type=RelWithDebInfo - -.build_coreneuron_nmodl: +.build_coreneuron: extends: [.build] variables: + SPACK_PACKAGE: coreneuron # NEURON depends on py-mpi4py, most of whose dependencies are pulled in by # nmodl%gcc, with the exception of MPI, which is pulled in by # coreneuron%{nvhpc,intel}. hpe-mpi is an external package anyway, so @@ -106,102 +92,168 @@ build:coreneuron:mod2c:nvhpc:acc:unified: # dependency graph and not changing which installation is used, but this # means that in the NEURON step an existing py-mpi4py%gcc can be used. # Otherwise a new py-mpi4py with hpe-mpi%{nvhpc,intel} will be built. + # caliper: papi%nvhpc does not build; use the caliper from the deployment # TODO: fix this more robustly so we don't have to play so many games. - SPACK_PACKAGE_DEPENDENCIES: ^hpe-mpi%gcc + SPACK_PACKAGE_DEPENDENCIES: ^hpe-mpi%gcc ^caliper%gcc+cuda cuda_arch=70 -build:coreneuron:nmodl:nvhpc:omp: - extends: [.build_coreneuron_nmodl, .spack_nvhpc] +# TODO: improve coverage by switching an Intel build to be statically linked +# TODO: improve coverage by switching an Intel build to RelWithDebInfo +# TODO: improve coverage by enabling +openmp on an Intel build +build:coreneuron:mod2c:intel:shared:debug: + extends: [.build_coreneuron, .spack_intel] variables: - SPACK_PACKAGE: coreneuron - # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type - SPACK_PACKAGE_SPEC: +nmodl+openmp+gpu+tests~legacy-unit~sympy build_type=RelWithDebInfo + SPACK_PACKAGE_SPEC: +caliper~gpu~legacy-unit~nmodl~openmp+shared+tests~unified build_type=Debug + +build:coreneuron:nmodl:intel:debug:legacy: + extends: [.build_coreneuron, .spack_intel] needs: ["build:nmodl"] + variables: + SPACK_PACKAGE_SPEC: +caliper~gpu~legacy-unit+nmodl~openmp~shared~sympy+tests~unified build_type=Debug -build:coreneuron:nmodl:nvhpc:acc: - extends: [.build_coreneuron_nmodl, .spack_nvhpc] +# Disable caliper to improve coverage +build:coreneuron:nmodl:intel:shared:debug: + extends: [.build_coreneuron, .spack_intel] + needs: ["build:nmodl"] variables: - SPACK_PACKAGE: coreneuron - # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type - # Sympy + OpenMP target offload does not currently work with NVHPC - SPACK_PACKAGE_SPEC: +nmodl~openmp+gpu+tests~legacy-unit+sympy build_type=RelWithDebInfo + SPACK_PACKAGE_DEPENDENCIES: ^hpe-mpi%gcc + SPACK_PACKAGE_SPEC: ~caliper~gpu~legacy-unit+nmodl~openmp+shared+sympy+tests~unified build_type=Debug + +# Not linked to a NEURON build+test job, see +# https://github.com/BlueBrain/CoreNeuron/issues/594 +build:coreneuron:mod2c:nvhpc:acc:debug:unified: + extends: [.build_coreneuron, .spack_nvhpc] + variables: + SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit~nmodl+openmp~shared+tests+unified build_type=Debug + +# Shared + OpenACC + OpenMP host threading has problems +build:coreneuron:mod2c:nvhpc:acc:shared: + extends: [.build_coreneuron, .spack_nvhpc] + variables: + SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit~nmodl~openmp+shared+tests~unified build_type=RelWithDebInfo + +build:coreneuron:nmodl:nvhpc:acc:debug:legacy: + extends: [.build_coreneuron, .spack_nvhpc] needs: ["build:nmodl"] + variables: + SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit+nmodl~openmp~shared~sympy+tests~unified build_type=Debug -build:coreneuron:mod2c:intel: - extends: [.build, .spack_intel] +build:coreneuron:nmodl:nvhpc:acc:shared: + extends: [.build_coreneuron, .spack_nvhpc] + needs: ["build:nmodl"] variables: - SPACK_PACKAGE: coreneuron - SPACK_PACKAGE_SPEC: +tests~legacy-unit build_type=Debug + SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit+nmodl~openmp+shared+sympy+tests~unified build_type=RelWithDebInfo -build:coreneuron:nmodl:intel: - extends: [.build_coreneuron_nmodl, .spack_intel] +build:coreneuron:nmodl:nvhpc:omp:legacy: + extends: [.build_coreneuron, .spack_nvhpc] + needs: ["build:nmodl"] variables: - SPACK_PACKAGE: coreneuron - SPACK_PACKAGE_SPEC: +nmodl+tests~legacy-unit build_type=Debug + SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit+nmodl+openmp~shared~sympy+tests~unified build_type=RelWithDebInfo + +build:coreneuron:nmodl:nvhpc:omp:debug: + extends: [.build_coreneuron, .spack_nvhpc] needs: ["build:nmodl"] + variables: + SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit+nmodl+openmp~shared+sympy+tests~unified build_type=Debug # Build NEURON -build:neuron:mod2c:nvhpc:acc: +build:neuron:mod2c:intel:shared:debug: + extends: [.build_neuron, .spack_intel] + needs: ["build:coreneuron:mod2c:intel:shared:debug"] + +build:neuron:nmodl:intel:debug:legacy: + extends: [.build_neuron, .spack_intel] + needs: ["build:coreneuron:nmodl:intel:debug:legacy"] + +build:neuron:nmodl:intel:shared:debug: + extends: [.build_neuron, .spack_intel] + needs: ["build:coreneuron:nmodl:intel:shared:debug"] + +build:neuron:mod2c:nvhpc:acc:shared: extends: [.build_neuron, .spack_nvhpc] - needs: ["build:coreneuron:mod2c:nvhpc:acc"] + needs: ["build:coreneuron:mod2c:nvhpc:acc:shared"] -build:neuron:nmodl:nvhpc:omp: +build:neuron:nmodl:nvhpc:acc:debug:legacy: extends: [.build_neuron, .spack_nvhpc] - needs: ["build:coreneuron:nmodl:nvhpc:omp"] + needs: ["build:coreneuron:nmodl:nvhpc:acc:debug:legacy"] -build:neuron:nmodl:nvhpc:acc: +build:neuron:nmodl:nvhpc:acc:shared: extends: [.build_neuron, .spack_nvhpc] - needs: ["build:coreneuron:nmodl:nvhpc:acc"] + needs: ["build:coreneuron:nmodl:nvhpc:acc:shared"] -build:neuron:mod2c:intel: - extends: [.build_neuron, .spack_intel] - needs: ["build:coreneuron:mod2c:intel"] +build:neuron:nmodl:nvhpc:omp:legacy: + extends: [.build_neuron, .spack_nvhpc] + needs: ["build:coreneuron:nmodl:nvhpc:omp:legacy"] -build:neuron:nmodl:intel: - extends: [.build_neuron, .spack_intel] - needs: ["build:coreneuron:nmodl:intel"] +build:neuron:nmodl:nvhpc:omp:debug: + extends: [.build_neuron, .spack_nvhpc] + needs: ["build:coreneuron:nmodl:nvhpc:omp:debug"] # Test CoreNEURON -test:coreneuron:mod2c:nvhpc:acc: +test:coreneuron:mod2c:intel:shared:debug: + extends: [.ctest] + needs: ["build:coreneuron:mod2c:intel:shared:debug"] + +test:coreneuron:nmodl:intel:debug:legacy: + extends: [.ctest] + needs: ["build:coreneuron:nmodl:intel:debug:legacy"] + +test:coreneuron:nmodl:intel:shared:debug: + extends: [.ctest] + needs: ["build:coreneuron:nmodl:intel:shared:debug"] + +test:coreneuron:mod2c:nvhpc:acc:debug:unified: extends: [.ctest, .gpu_node] - needs: ["build:coreneuron:mod2c:nvhpc:acc"] + needs: ["build:coreneuron:mod2c:nvhpc:acc:debug:unified"] -test:coreneuron:mod2c:nvhpc:acc:unified: +test:coreneuron:mod2c:nvhpc:acc:shared: extends: [.ctest, .gpu_node] - needs: ["build:coreneuron:mod2c:nvhpc:acc:unified"] + needs: ["build:coreneuron:mod2c:nvhpc:acc:shared"] -test:coreneuron:nmodl:nvhpc:omp: +test:coreneuron:nmodl:nvhpc:acc:debug:legacy: extends: [.ctest, .gpu_node] - needs: ["build:coreneuron:nmodl:nvhpc:omp"] + needs: ["build:coreneuron:nmodl:nvhpc:acc:debug:legacy"] -test:coreneuron:nmodl:nvhpc:acc: +test:coreneuron:nmodl:nvhpc:acc:shared: extends: [.ctest, .gpu_node] - needs: ["build:coreneuron:nmodl:nvhpc:acc"] + needs: ["build:coreneuron:nmodl:nvhpc:acc:shared"] -test:coreneuron:mod2c:intel: - extends: [.ctest] - needs: ["build:coreneuron:mod2c:intel"] +test:coreneuron:nmodl:nvhpc:omp:legacy: + extends: [.ctest, .gpu_node] + needs: ["build:coreneuron:nmodl:nvhpc:omp:legacy"] -test:coreneuron:nmodl:intel: - extends: [.ctest] - needs: ["build:coreneuron:nmodl:intel"] +test:coreneuron:nmodl:nvhpc:omp:debug: + extends: [.ctest, .gpu_node] + needs: ["build:coreneuron:nmodl:nvhpc:omp:debug"] # Test NEURON -test:neuron:mod2c:nvhpc:acc: +test:neuron:mod2c:intel:shared:debug: + extends: [.test_neuron] + needs: ["build:neuron:mod2c:intel:shared:debug"] + +test:neuron:nmodl:intel:debug:legacy: + extends: [.test_neuron] + needs: ["build:neuron:nmodl:intel:debug:legacy"] + +test:neuron:nmodl:intel:shared:debug: + extends: [.test_neuron] + needs: ["build:neuron:nmodl:intel:shared:debug"] + +test:neuron:mod2c:nvhpc:acc:shared: extends: [.test_neuron, .gpu_node] - needs: ["build:neuron:mod2c:nvhpc:acc"] + needs: ["build:neuron:mod2c:nvhpc:acc:shared"] -test:neuron:nmodl:nvhpc:omp: +test:neuron:nmodl:nvhpc:acc:debug:legacy: extends: [.test_neuron, .gpu_node] - needs: ["build:neuron:nmodl:nvhpc:omp"] + needs: ["build:neuron:nmodl:nvhpc:acc:debug:legacy"] -test:neuron:nmodl:nvhpc:acc: +test:neuron:nmodl:nvhpc:acc:shared: extends: [.test_neuron, .gpu_node] - needs: ["build:neuron:nmodl:nvhpc:acc"] + needs: ["build:neuron:nmodl:nvhpc:acc:shared"] -test:neuron:mod2c:intel: - extends: [.test_neuron] - needs: ["build:neuron:mod2c:intel"] +test:neuron:nmodl:nvhpc:omp:legacy: + extends: [.test_neuron, .gpu_node] + needs: ["build:neuron:nmodl:nvhpc:omp:legacy"] -test:neuron:nmodl:intel: - extends: [.test_neuron] - needs: ["build:neuron:nmodl:intel"] +test:neuron:nmodl:nvhpc:omp:debug: + extends: [.test_neuron, .gpu_node] + needs: ["build:neuron:nmodl:nvhpc:omp:debug"] diff --git a/CMake/MakefileBuildOptions.cmake b/CMake/MakefileBuildOptions.cmake index 7aef0c549..e4c658349 100644 --- a/CMake/MakefileBuildOptions.cmake +++ b/CMake/MakefileBuildOptions.cmake @@ -21,7 +21,7 @@ set(CMAKE_ISPC_FLAGS "${CMAKE_ISPC_FLAGS} --pic") set(NMODL_COMMON_ARGS "passes --inline") if(NOT "${CORENRN_NMODL_FLAGS}" STREQUAL "") - set(NMODL_COMMON_ARGS "${NMODL_COMMON_ARGS} ${CORENRN_NMODL_FLAGS}") + string(APPEND NMODL_COMMON_ARGS " ${CORENRN_NMODL_FLAGS}") endif() set(NMODL_CPU_BACKEND_ARGS "host --c") @@ -29,66 +29,135 @@ set(NMODL_ISPC_BACKEND_ARGS "host --ispc") set(NMODL_ACC_BACKEND_ARGS "host --c acc --oacc") # ============================================================================= -# Extract Compile definitions : common to all backend +# Construct the linker arguments that are used inside nrnivmodl-core (to build libcorenrnmech from +# libcoreneuron-core, libcoreneuron-cuda and mechanism object files) and inside nrnivmodl (to link +# NEURON's special against CoreNEURON's libcorenrnmech). These are stored in two global properties: +# CORENRN_LIB_LINK_FLAGS (used by NEURON/nrnivmodl to link special against CoreNEURON) and +# CORENRN_LIB_LINK_DEP_FLAGS (used by CoreNEURON/nrnivmodl-core to link libcorenrnmech.so). +# Conceptually: CORENRN_LIB_LINK_FLAGS = -lcorenrnmech $CORENRN_LIB_LINK_DEP_FLAGS # ============================================================================= -get_directory_property(COMPILE_DEFS COMPILE_DEFINITIONS) -if(COMPILE_DEFS) - set(CORENRN_COMMON_COMPILE_DEFS "") - foreach(flag ${COMPILE_DEFS}) - set(CORENRN_COMMON_COMPILE_DEFS "${CORENRN_COMMON_COMPILE_DEFS} -D${flag}") - endforeach() +if(NOT CORENRN_ENABLE_SHARED) + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -Wl,--whole-archive") endif() +set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -lcorenrnmech") +if(NOT CORENRN_ENABLE_SHARED) + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -Wl,--no-whole-archive") +endif() +# Essentially we "just" want to unpack the CMake dependencies of the `coreneuron-core` target into a +# plain string that we can bake into the Makefiles in both NEURON and CoreNEURON. +function(coreneuron_process_library_path library) + get_filename_component(library_dir "${library}" DIRECTORY) + if(NOT library_dir) + # In case target is not a target but is just the name of a library, e.g. "dl" + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_DEP_FLAGS " -l${library}") + elseif("${library_dir}" MATCHES "^(/lib|/lib64|/usr/lib|/usr/lib64)$") + # e.g. /usr/lib64/libpthread.so -> -lpthread TODO: consider using + # https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_IMPLICIT_LINK_DIRECTORIES.html, or + # dropping this special case entirely + get_filename_component(libname ${library} NAME_WE) + string(REGEX REPLACE "^lib" "" libname ${libname}) + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_DEP_FLAGS " -l${libname}") + else() + # It's a full path, include that on the line + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_DEP_FLAGS + " -Wl,-rpath,${library_dir} ${library}") + endif() +endfunction() +function(coreneuron_process_target target) + if(TARGET ${target}) + if(NOT target STREQUAL "coreneuron-core") + # This is a special case: libcoreneuron-core.a is manually unpacked into .o files by the + # nrnivmodl-core Makefile, so we do not want to also emit an -lcoreneuron-core argument. + get_target_property(target_inc_dirs ${target} INTERFACE_INCLUDE_DIRECTORIES) + if(target_inc_dirs) + foreach(inc_dir_genex ${target_inc_dirs}) + string(GENEX_STRIP "${inc_dir_genex}" inc_dir) + if(inc_dir) + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_EXTRA_COMPILE_FLAGS " -I${inc_dir}") + endif() + endforeach() + endif() + get_target_property(target_imported ${target} IMPORTED) + if(target_imported) + # In this case we can extract the full path to the library + get_target_property(target_location ${target} LOCATION) + coreneuron_process_library_path(${target_location}) + else() + # This is probably another of our libraries, like -lcoreneuron-cuda. We might need to add -L + # and an RPATH later. + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_DEP_FLAGS " -l${target}") + endif() + endif() + get_target_property(target_libraries ${target} LINK_LIBRARIES) + if(target_libraries) + foreach(child_target ${target_libraries}) + coreneuron_process_target(${child_target}) + endforeach() + endif() + return() + endif() + coreneuron_process_library_path("${target}") +endfunction() +coreneuron_process_target(coreneuron-core) +get_property(CORENRN_LIB_LINK_DEP_FLAGS GLOBAL PROPERTY CORENRN_LIB_LINK_DEP_FLAGS) +set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " ${CORENRN_LIB_LINK_DEP_FLAGS}") +# In static builds then NEURON uses dlopen(nullptr, ...) to look for the corenrn_embedded_run +# symbol, which comes from libcoreneuron-core.a and gets included in libcorenrnmech. +if(NOT CORENRN_ENABLE_SHARED) + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -rdynamic") +endif() +get_property(CORENRN_EXTRA_COMPILE_FLAGS GLOBAL PROPERTY CORENRN_EXTRA_COMPILE_FLAGS) +get_property(CORENRN_LIB_LINK_FLAGS GLOBAL PROPERTY CORENRN_LIB_LINK_FLAGS) + +# Detect if --start-group and --end-group are valid linker arguments. These are typically needed +# when linking mutually-dependent .o files (or where we don't know the correct order) on Linux, but +# they are not needed *or* recognised by the macOS linker. +if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18) + include(CheckLinkerFlag) + check_linker_flag(CXX -Wl,--start-group CORENRN_CXX_LINKER_SUPPORTS_START_GROUP) +elseif(CMAKE_SYSTEM_NAME MATCHES Linux) + # Assume that --start-group and --end-group are only supported on Linux + set(CORENRN_CXX_LINKER_SUPPORTS_START_GROUP ON) +endif() +if(CORENRN_CXX_LINKER_SUPPORTS_START_GROUP) + set(CORENEURON_LINKER_START_GROUP -Wl,--start-group) + set(CORENEURON_LINKER_END_GROUP -Wl,--end-group) +endif() + +# Things that used to be in CORENRN_LIB_LINK_FLAGS: -lrt -L${CMAKE_HOST_SYSTEM_PROCESSOR} +# -L${caliper_LIB_DIR} -l${CALIPER_LIB} # ============================================================================= -# link flags : common to all backend +# Turn CORENRN_COMPILE_DEFS into a list of -DFOO[=BAR] options. # ============================================================================= -# ~~~ -# find_cuda uses FindThreads that adds below imported target we -# shouldn't add imported target to link line -# ~~~ -list(REMOVE_ITEM CORENRN_LINK_LIBS "Threads::Threads") +list(TRANSFORM CORENRN_COMPILE_DEFS PREPEND -D OUTPUT_VARIABLE CORENRN_COMPILE_DEF_FLAGS) -string(JOIN " " CORENRN_COMMON_LDFLAGS ${CORENRN_EXTRA_LINK_FLAGS}) +# ============================================================================= +# Extra link flags that we need to include when linking libcorenrnmech.{a,so} in CoreNEURON but that +# do not need to be passed to NEURON to use when linking nrniv/special (why?) +# ============================================================================= +string(JOIN " " CORENRN_COMMON_LDFLAGS ${CORENRN_LIB_LINK_DEP_FLAGS} ${CORENRN_EXTRA_LINK_FLAGS}) if(CORENRN_SANITIZER_LIBRARY_DIR) string(APPEND CORENRN_COMMON_LDFLAGS " -Wl,-rpath,${CORENRN_SANITIZER_LIBRARY_DIR}") endif() string(JOIN " " CORENRN_SANITIZER_ENABLE_ENVIRONMENT_STRING ${CORENRN_SANITIZER_ENABLE_ENVIRONMENT}) -# replicate CMake magic to transform system libs to -l -foreach(link_lib ${CORENRN_LINK_LIBS}) - if(${link_lib} MATCHES "\-l.*") - string(APPEND CORENRN_COMMON_LDFLAGS " ${link_lib}") - continue() - endif() - get_filename_component(path ${link_lib} DIRECTORY) - if(NOT path) - string(APPEND CORENRN_COMMON_LDFLAGS " -l${link_lib}") - elseif("${path}" MATCHES "^(/lib|/lib64|/usr/lib|/usr/lib64)$") - get_filename_component(libname ${link_lib} NAME_WE) - string(REGEX REPLACE "^lib" "" libname ${libname}) - string(APPEND CORENRN_COMMON_LDFLAGS " -l${libname}") - else() - string(APPEND CORENRN_COMMON_LDFLAGS " ${link_lib}") - endif() -endforeach() - # ============================================================================= # compile flags : common to all backend # ============================================================================= -string(JOIN " " CMAKE_CXX17_STANDARD_COMPILE_OPTION_STRING ${CMAKE_CXX17_STANDARD_COMPILE_OPTION}) string(TOUPPER "${CMAKE_BUILD_TYPE}" _BUILD_TYPE) -list(TRANSFORM CORENRN_COMPILE_DEFS PREPEND -D OUTPUT_VARIABLE CORENRN_COMPILE_DEF_FLAGS) string( JOIN " " CORENRN_CXX_FLAGS ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${_BUILD_TYPE}} - ${CMAKE_CXX17_STANDARD_COMPILE_OPTION_STRING} + ${CMAKE_CXX17_STANDARD_COMPILE_OPTION} ${NVHPC_ACC_COMP_FLAGS} ${NVHPC_CXX_INLINE_FLAGS} ${CORENRN_COMPILE_DEF_FLAGS} - ${CORENRN_EXTRA_MECH_CXX_FLAGS}) + ${CORENRN_EXTRA_MECH_CXX_FLAGS} + ${CORENRN_EXTRA_COMPILE_FLAGS}) # ============================================================================= # nmodl/mod2c related options : TODO diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake index 1c18225b6..a21f8b523 100644 --- a/CMake/OpenAccHelper.cmake +++ b/CMake/OpenAccHelper.cmake @@ -34,13 +34,13 @@ if(CORENRN_ENABLE_GPU) cnrn_parse_version(${CMAKE_CXX_COMPILER_VERSION} OUTPUT_MAJOR_MINOR CORENRN_NVHPC_MAJOR_MINOR_VERSION) # Enable cudaProfiler{Start,Stop}() behind the Instrumentor::phase... APIs - add_compile_definitions(CORENEURON_CUDA_PROFILING CORENEURON_ENABLE_GPU) + list(APPEND CORENRN_COMPILE_DEFS CORENEURON_CUDA_PROFILING CORENEURON_ENABLE_GPU) # Plain C++ code in CoreNEURON may need to use CUDA runtime APIs for, for example, starting and # stopping profiling. This makes sure those headers can be found. include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) # cuda unified memory support if(CORENRN_ENABLE_CUDA_UNIFIED_MEMORY) - add_compile_definitions(CORENEURON_UNIFIED_MEMORY) + list(APPEND CORENRN_COMPILE_DEFS CORENEURON_UNIFIED_MEMORY) endif() if(${CMAKE_VERSION} VERSION_LESS 3.17) # Hopefully we can drop this soon. Parse ${CMAKE_CUDA_COMPILER_VERSION} into a shorter X.Y @@ -67,7 +67,14 @@ if(CORENRN_ENABLE_GPU) # and offloaded OpenACC/OpenMP code. Using -cuda when compiling seems to improve error messages in # some cases, and to be recommended by NVIDIA. We pass -gpu=cudaX.Y to ensure that OpenACC/OpenMP # code is compiled with the same CUDA version as the explicit CUDA code. - set(NVHPC_ACC_COMP_FLAGS "-cuda -gpu=cuda${CORENRN_CUDA_VERSION_SHORT},lineinfo") + set(NVHPC_ACC_COMP_FLAGS "-cuda -gpu=cuda${CORENRN_CUDA_VERSION_SHORT}") + # Combining -gpu=lineinfo with -O0 -g gives a warning: Conflicting options --device-debug and + # --generate-line-info specified, ignoring --generate-line-info option + if(CMAKE_BUILD_TYPE STREQUAL "Debug") + string(APPEND NVHPC_ACC_COMP_FLAGS ",debug") + else() + string(APPEND NVHPC_ACC_COMP_FLAGS ",lineinfo") + endif() # Make sure that OpenACC code is generated for the same compute capabilities as the explicit CUDA # code. Otherwise there may be confusing linker errors. We cannot rely on nvcc and nvc++ using the # same default compute capabilities as each other, particularly on GPU-less build machines. @@ -77,7 +84,7 @@ if(CORENRN_ENABLE_GPU) if(CORENRN_ACCELERATOR_OFFLOAD STREQUAL "OpenMP") # Enable OpenMP target offload to GPU and if both OpenACC and OpenMP directives are available # for a region then prefer OpenMP. - add_compile_definitions(CORENEURON_PREFER_OPENMP_OFFLOAD) + list(APPEND CORENRN_COMPILE_DEFS CORENEURON_PREFER_OPENMP_OFFLOAD) string(APPEND NVHPC_ACC_COMP_FLAGS " -mp=gpu") elseif(CORENRN_ACCELERATOR_OFFLOAD STREQUAL "OpenACC") # Only enable OpenACC offload for GPU @@ -94,19 +101,26 @@ if(CORENRN_ENABLE_GPU) endif() # ============================================================================= -# Set global property that will be used by NEURON to link with CoreNEURON +# Initialise global properties that will be used by NEURON to link with CoreNEURON # ============================================================================= if(CORENRN_ENABLE_GPU) - set_property( - GLOBAL - PROPERTY - CORENEURON_LIB_LINK_FLAGS - "${NVHPC_ACC_COMP_FLAGS} -rdynamic -lrt -Wl,--whole-archive -L${CMAKE_HOST_SYSTEM_PROCESSOR} -lcorenrnmech -L$(libdir) -lcoreneuron -Wl,--no-whole-archive" - ) -else() - set_property(GLOBAL PROPERTY CORENEURON_LIB_LINK_FLAGS - "-L${CMAKE_HOST_SYSTEM_PROCESSOR} -lcorenrnmech") -endif(CORENRN_ENABLE_GPU) + # CORENRN_LIB_LINK_FLAGS is the full set of flags needed to link against libcorenrnmech.so: + # something like `-acc -lcorenrnmech ...`. CORENRN_NEURON_LINK_FLAGS only contains flags that need + # to be used when linking the NEURON Python module to make sure it is able to dynamically load + # libcorenrnmech.so. + set_property(GLOBAL PROPERTY CORENRN_LIB_LINK_FLAGS "${NVHPC_ACC_COMP_FLAGS}") + if(CORENRN_ENABLE_SHARED) + # Because of + # https://forums.developer.nvidia.com/t/dynamically-loading-an-openacc-enabled-shared-library-from-an-executable-compiled-with-nvc-does-not-work/210968 + # we have to tell NEURON to pass OpenACC flags when linking special, otherwise we end up with an + # `nrniv` binary that cannot dynamically load CoreNEURON in shared-library builds. + set_property(GLOBAL PROPERTY CORENRN_NEURON_LINK_FLAGS "${NVHPC_ACC_COMP_FLAGS}") + endif() +endif() + +# NEURON needs to have access to this when CoreNEURON is built as a submodule. If CoreNEURON is +# installed externally then this is set via coreneuron-config.cmake +set_property(GLOBAL PROPERTY CORENRN_ENABLE_SHARED ${CORENRN_ENABLE_SHARED}) if(CORENRN_HAVE_NVHPC_COMPILER) if(${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL 20.7) diff --git a/CMake/coreneuron-config.cmake.in b/CMake/coreneuron-config.cmake.in index 29f67c92f..9f7ac4997 100644 --- a/CMake/coreneuron-config.cmake.in +++ b/CMake/coreneuron-config.cmake.in @@ -14,12 +14,14 @@ set(CORENRN_VERSION_PATCH @PROJECT_VERSION_PATCH@) set(CORENRN_ENABLE_GPU @CORENRN_ENABLE_GPU@) set(CORENRN_ENABLE_NMODL @CORENRN_ENABLE_NMODL@) set(CORENRN_ENABLE_REPORTING @CORENRN_ENABLE_REPORTING@) -set(CORENEURON_LIB_LINK_FLAGS "@CORENEURON_LIB_LINK_FLAGS@") +set(CORENRN_ENABLE_SHARED @CORENRN_ENABLE_SHARED@) +set(CORENRN_LIB_LINK_FLAGS "@CORENRN_LIB_LINK_FLAGS@") +set(CORENRN_NEURON_LINK_FLAGS "@CORENRN_NEURON_LINK_FLAGS@") find_path(CORENEURON_INCLUDE_DIR "coreneuron/coreneuron.h" HINTS "${CONFIG_PATH}/../../include") find_path( CORENEURON_LIB_DIR - NAMES libcoreneuron.a libcoreneuron.so libcoreneuron.dylib + NAMES libcorenrnmech.a libcorenrnmech.so libcorenrnmech.dylib HINTS "${CONFIG_PATH}/../../lib") include(${CONFIG_PATH}/coreneuron.cmake) diff --git a/CMakeLists.txt b/CMakeLists.txt index eda8f2feb..183639480 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -160,9 +160,6 @@ set(CORENRN_ACCELERATOR_OFFLOAD "Disabled") if(CORENRN_ENABLE_GPU) # Older CMake versions than 3.15 have not been tested for GPU/CUDA/OpenACC support after # https://github.com/BlueBrain/CoreNeuron/pull/609. - # https://cmake.org/cmake/help/latest/release/3.14.html#properties suggests there would be - # problems because of expressions like set_target_properties(lfp_test_bin PROPERTIES - # CUDA_RESOLVE_DEVICE_SYMBOLS OFF) # Fail hard and early if we don't have the PGI/NVHPC compiler. if(NOT CORENRN_HAVE_NVHPC_COMPILER) @@ -263,17 +260,21 @@ find_package(Perl REQUIRED) # Common build options # ============================================================================= # build mod files for coreneuron -add_definitions(-DCORENEURON_BUILD) - +list(APPEND CORENRN_COMPILE_DEFS CORENEURON_BUILD) set(CMAKE_REQUIRED_QUIET TRUE) check_include_files(malloc.h have_malloc_h) if(have_malloc_h) - add_definitions("-DHAVE_MALLOC_H") + list(APPEND CORENRN_COMPILE_DEFS HAVE_MALLOC_H) endif() # ============================================================================= # Build option specific compiler flags # ============================================================================= +if(CORENRN_ENABLE_NMODL) + # We use Eigen for "small" matrices with thread-level parallelism handled at a higher level; tell + # Eigen not to try to multithread internally + list(APPEND CORENRN_COMPILE_DEFS EIGEN_DONT_PARALLELIZE) +endif() if(CORENRN_HAVE_NVHPC_COMPILER) # PGI with llvm code generation doesn't have necessary assembly intrinsic headers list(APPEND CORENRN_COMPILE_DEFS EIGEN_DONT_VECTORIZE=1) @@ -292,14 +293,6 @@ if(CORENRN_HAVE_NVHPC_COMPILER) endif() endif() -# ~~~ -# OpenACC needs to build static library in order to have global/routines working. -# See https://www.pgroup.com/userforum/viewtopic.php?t=5350 -# ~~~ -if(CORENRN_ENABLE_GPU) - set(CORENRN_ENABLE_SHARED OFF) -endif() - if(CORENRN_ENABLE_SHARED) set(COMPILE_LIBRARY_TYPE "SHARED") else() @@ -313,14 +306,14 @@ endif() if(CORENRN_ENABLE_MPI) find_package(MPI REQUIRED) - add_definitions("-DNRNMPI=1") + list(APPEND CORENRN_COMPILE_DEFS NRNMPI=1) # avoid linking to C++ bindings - add_definitions("-DMPI_NO_CPPBIND=1") - add_definitions("-DOMPI_SKIP_MPICXX=1") - add_definitions("-DMPICH_SKIP_MPICXX=1") + list(APPEND CORENRN_COMPILE_DEFS MPI_NO_CPPBIND=1) + list(APPEND CORENRN_COMPILE_DEFS OMPI_SKIP_MPICXX=1) + list(APPEND CORENRN_COMPILE_DEFS MPICH_SKIP_MPICXX=1) else() - add_definitions("-DNRNMPI=0") - add_definitions("-DNRN_MULTISEND=0") + list(APPEND CORENRN_COMPILE_DEFS NRNMPI=0) + list(APPEND CORENRN_COMPILE_DEFS NRN_MULTISEND=0) endif() if(CORENRN_ENABLE_OPENMP) @@ -331,23 +324,23 @@ if(CORENRN_ENABLE_OPENMP) endif() endif() -add_definitions("-DLAYOUT=0") +list(APPEND CORENRN_COMPILE_DEFS LAYOUT=0) if(NOT CORENRN_ENABLE_HOC_EXP) - add_definitions("-DDISABLE_HOC_EXP") + list(APPEND CORENRN_COMPILE_DEFS DISABLE_HOC_EXP) endif() # splay tree required for net_move if(CORENRN_ENABLE_SPLAYTREE_QUEUING) - add_definitions("-DENABLE_SPLAYTREE_QUEUING") + list(APPEND CORENRN_COMPILE_DEFS ENABLE_SPLAYTREE_QUEUING) endif() if(NOT CORENRN_ENABLE_NET_RECEIVE_BUFFER) - add_definitions("-DNET_RECEIVE_BUFFERING=0") + list(APPEND CORENRN_COMPILE_DEFS NET_RECEIVE_BUFFERING=0) endif() if(NOT CORENRN_ENABLE_TIMEOUT) - add_definitions("-DDISABLE_TIMEOUT") + list(APPEND CORENRN_COMPILE_DEFS DISABLE_TIMEOUT) endif() if(CORENRN_ENABLE_REPORTING) @@ -356,7 +349,7 @@ if(CORENRN_ENABLE_REPORTING) find_program(H5DUMP_EXECUTABLE h5dump) if(reportinglib_FOUND) - add_definitions("-DENABLE_BIN_REPORTS") + list(APPEND CORENRN_COMPILE_DEFS ENABLE_BIN_REPORTS) set(ENABLE_BIN_REPORTS_TESTS ON) else() set(reportinglib_INCLUDE_DIR "") @@ -364,7 +357,7 @@ if(CORENRN_ENABLE_REPORTING) endif() if(sonata_FOUND) if(TARGET sonata::sonata_report) - add_definitions("-DENABLE_SONATA_REPORTS") + list(APPEND CORENRN_COMPILE_DEFS ENABLE_SONATA_REPORTS) set(ENABLE_SONATA_REPORTS_TESTS ON) else() message(SEND_ERROR "SONATA library was found but without reporting support") @@ -384,6 +377,7 @@ if(CORENRN_ENABLE_LEGACY_UNITS) else() set(CORENRN_USE_LEGACY_UNITS 0) endif() +list(APPEND CORENRN_COMPILE_DEFS CORENEURON_USE_LEGACY_UNITS=${CORENRN_USE_LEGACY_UNITS}) # Propagate Legacy Units flag to backends. set(MOD2C_ENABLE_LEGACY_UNITS ${CORENRN_ENABLE_LEGACY_UNITS} @@ -396,7 +390,7 @@ if(CORENRN_ENABLE_MPI_DYNAMIC) if(NOT CORENRN_ENABLE_MPI) message(FATAL_ERROR "Cannot enable dynamic mpi without mpi") endif() - add_compile_definitions(CORENRN_ENABLE_MPI_DYNAMIC) + list(APPEND CORENRN_COMPILE_DEFS CORENEURON_ENABLE_MPI_DYNAMIC) endif() if(CORENRN_ENABLE_PRCELLSTATE) @@ -405,7 +399,7 @@ else() set(CORENRN_NRN_PRCELLSTATE 0) endif() if(MINGW) - add_definitions("-DMINGW") + list(APPEND CORENRN_COMPILE_DEFS MINGW) endif() # ============================================================================= @@ -448,22 +442,20 @@ endif() # ============================================================================= if(CORENRN_ENABLE_CALIPER_PROFILING) find_package(caliper REQUIRED) - include_directories(${caliper_INCLUDE_DIR}) - add_definitions("-DCORENEURON_CALIPER") - set(CALIPER_LIB "caliper") - set_property(GLOBAL APPEND_STRING PROPERTY CORENEURON_LIB_LINK_FLAGS - " -L${caliper_LIB_DIR} -l${CALIPER_LIB}") + list(APPEND CORENRN_COMPILE_DEFS CORENEURON_CALIPER) + set(CORENRN_CALIPER_LIB caliper) endif() if(CORENRN_ENABLE_LIKWID_PROFILING) find_package(likwid REQUIRED) + list(APPEND CORENRN_COMPILE_DEFS LIKWID_PERFMON) + # TODO: avoid this part, probably by using some likwid CMake target include_directories(${likwid_INCLUDE_DIRS}) - add_definitions("-DLIKWID_PERFMON") endif() # enable debugging code with extra logs to stdout if(CORENRN_ENABLE_DEBUG_CODE) - add_definitions(-DCORENRN_DEBUG -DCHKPNTDEBUG -DCORENRN_DEBUG_QUEUE -DINTERLEAVE_DEBUG) + list(APPEND CORENRN_COMPILE_DEFS CORENRN_DEBUG CHKPNTDEBUG CORENRN_DEBUG_QUEUE INTERLEAVE_DEBUG) endif() # ============================================================================= @@ -473,38 +465,18 @@ endif() # compiler will be invoked with these flags, so we have to use flags that are as generic as # possible. if(NOT DEFINED NRN_WHEEL_BUILD OR NOT NRN_WHEEL_BUILD) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${IGNORE_UNKNOWN_PRAGMA_FLAGS}") + list(APPEND CORENRN_EXTRA_CXX_FLAGS "${IGNORE_UNKNOWN_PRAGMA_FLAGS}") endif() -# ============================================================================= -# Add main directories -# ============================================================================= +# Add the main source directory add_subdirectory(coreneuron) -if(CORENRN_ENABLE_GPU) - get_target_property(CORENRN_LINK_LIBRARIES coreneuron INTERFACE_LINK_LIBRARIES) - if(CORENRN_LINK_LIBRARIES) - foreach(LIB ${CORENRN_LINK_LIBRARIES}) - get_filename_component(dir_path ${LIB} DIRECTORY) - if(TARGET ${LIB}) - # See, for example, caliper where the coreneuron target depends on the caliper target (so we - # get LIB=caliper in this loop), but -l and -L are already added manually here: - # https://github.com/BlueBrain/CoreNeuron/blob/856cea4aa647c8f2b0d5bda6d0fc32144c5942e3/CMakeLists.txt#L411-L412 - message( - NOTICE - "Ignoring dependency '${LIB}' of 'coreneuron' and assuming relevant flags have already been added to CORENEURON_LIB_LINK_FLAGS." - ) - elseif(NOT dir_path) - # In case LIB is not a target but is just the name of a library, e.g. "dl" - set_property(GLOBAL APPEND_STRING PROPERTY CORENEURON_LIB_LINK_FLAGS " -l${LIB}") - else() - set_property(GLOBAL APPEND_STRING PROPERTY CORENEURON_LIB_LINK_FLAGS " ${LIB}") - endif() - endforeach() - endif() -endif() - +# Extract the various compiler option strings to use inside nrnivmodl-core. Sets the global property +# CORENRN_LIB_LINK_FLAGS, which contains the arguments that must be added to the link line for +# `special` to link against `libcorenrnmech.{a,so}` include(MakefileBuildOptions) + +# Generate the nrnivmodl-core script and makefile using the options from MakefileBuildOptions add_subdirectory(extra) if(CORENRN_ENABLE_UNIT_TESTS) @@ -514,7 +486,7 @@ endif() # ============================================================================= # Install cmake modules # ============================================================================= -get_property(CORENEURON_LIB_LINK_FLAGS GLOBAL PROPERTY CORENEURON_LIB_LINK_FLAGS) +get_property(CORENRN_NEURON_LINK_FLAGS GLOBAL PROPERTY CORENRN_NEURON_LINK_FLAGS) configure_file(CMake/coreneuron-config.cmake.in CMake/coreneuron-config.cmake @ONLY) install(FILES "${CMAKE_CURRENT_BINARY_DIR}/CMake/coreneuron-config.cmake" DESTINATION share/cmake) install(EXPORT coreneuron DESTINATION share/cmake) diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index 0dc648628..97d12e613 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -16,29 +16,28 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) # ============================================================================= # gather various source files # ============================================================================= -file(GLOB_RECURSE CORENEURON_HEADER_FILES "*.h*") -file(GLOB_RECURSE CORENEURON_TEMPLATE_FILES "*.ipp") file( GLOB CORENEURON_CODE_FILES "apps/main1.cpp" "apps/corenrn_parameters.cpp" - "gpu/*.cpp" + "gpu/nrn_acc_manager.cpp" "io/*.cpp" "io/reports/*.cpp" "mechanism/*.cpp" + "mpi/core/nrnmpi_def_cinc.cpp" "network/*.cpp" "permute/*.cpp" "sim/*.cpp" + "sim/scopmath/abort.cpp" + "sim/scopmath/newton_thread.cpp" "utils/*.cpp" "utils/*/*.c" "utils/*/*.cpp") -set(SCOPMATH_CODE_FILES "sim/scopmath/abort.cpp" "sim/scopmath/newton_thread.cpp") set(MPI_LIB_FILES "mpi/lib/mpispike.cpp" "mpi/lib/nrnmpi.cpp") -set(MPI_CORE_FILES "mpi/core/nrnmpi_def_cinc.cpp") if(CORENRN_ENABLE_MPI) # Building these requires -ldl, which is only added if MPI is enabled. - list(APPEND MPI_CORE_FILES "mpi/core/nrnmpi.cpp" "mpi/core/nrnmpidec.cpp") + list(APPEND CORENEURON_CODE_FILES "mpi/core/resolve.cpp" "mpi/core/nrnmpidec.cpp") endif() file(COPY ${CORENEURON_PROJECT_SOURCE_DIR}/external/Random123/include/Random123 DESTINATION ${CMAKE_BINARY_DIR}/include) @@ -107,30 +106,31 @@ if(CORENRN_ENABLE_GPU) set_source_files_properties(${OPENACC_EXCLUDED_FILES} PROPERTIES COMPILE_FLAGS "-DDISABLE_OPENACC") - # Only compile the explicit CUDA implementation of the Hines solver in GPU builds. - list(APPEND CORENEURON_CODE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/permute/cellorder.cu) - - # Eigen-3.5+ provides better GPU support. However, some functions cannot be called directly from - # within an OpenACC region. Therefore, we need to wrap them in a special API (decorate them with - # __device__ & acc routine tokens), which allows us to eventually call them from OpenACC. Calling - # these functions from CUDA kernels presents no issue ... - if(CORENRN_ENABLE_NMODL AND EXISTS ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cu) - list(APPEND CORENEURON_CODE_FILES ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cu) + # Only compile the explicit CUDA implementation of the Hines solver in GPU builds. Because of + # https://forums.developer.nvidia.com/t/cannot-dynamically-load-a-shared-library-containing-both-openacc-and-cuda-code/210972 + # this cannot be included in the same shared library as the rest of the OpenACC code. + set(CORENEURON_CUDA_FILES ${CMAKE_CURRENT_SOURCE_DIR}/permute/cellorder.cu) + + # Eigen functions cannot be called directly from OpenACC regions, but Eigen is sort-of compatible + # with being compiled as CUDA code. Because of + # https://forums.developer.nvidia.com/t/cannot-dynamically-load-a-shared-library-containing-both-openacc-and-cuda-code/210972 + # this has to mean `nvc++ -cuda` rather than `nvcc`. We explicitly instantiate Eigen functions for + # different matrix sizes in partial_piv_lu.cpp (with CUDA attributes but without OpenACC or OpenMP + # annotations) and dispatch to these from a wrapper in partial_piv_lu.h that does have + # OpenACC/OpenMP annotations. + if(CORENRN_ENABLE_NMODL AND EXISTS ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cpp) + list(APPEND CORENEURON_CODE_FILES ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cpp) + if(CORENRN_ENABLE_GPU + AND CORENRN_HAVE_NVHPC_COMPILER + AND CMAKE_BUILD_TYPE STREQUAL "Debug") + # In this case OpenAccHelper.cmake passes -gpu=debug, which makes these Eigen functions + # extremely slow. Downgrade that to -gpu=lineinfo for this file. + set_source_files_properties(${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cpp + PROPERTIES COMPILE_FLAGS "-gpu=lineinfo,nodebug -O1") + endif() endif() endif() -# ============================================================================= -# eion.cpp depends on CORENRN_USE_LEGACY_UNITS -# ============================================================================= -set(LegacyFR_FILES - ${CMAKE_CURRENT_SOURCE_DIR}/mechanism/eion.cpp ${CMAKE_CURRENT_SOURCE_DIR}/apps/main1.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/io/global_vars.cpp) - -set_property( - SOURCE ${LegacyFR_FILES} - APPEND - PROPERTY COMPILE_DEFINITIONS "CORENRN_USE_LEGACY_UNITS=${CORENRN_USE_LEGACY_UNITS}") - # ============================================================================= # create libraries # ============================================================================= @@ -146,27 +146,42 @@ if(CORENRN_ENABLE_MPI AND NOT CORENRN_ENABLE_MPI_DYNAMIC) target_include_directories( ${CORENRN_MPI_LIB_NAME} PRIVATE ${MPI_INCLUDE_PATH} ${CORENEURON_PROJECT_SOURCE_DIR} ${CORENEURON_PROJECT_BINARY_DIR}/generated) + target_link_libraries(${CORENRN_MPI_LIB_NAME} ${CORENRN_CALIPER_LIB}) set_property(TARGET ${CORENRN_MPI_LIB_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON) set(CORENRN_MPI_OBJ $) endif() -# main coreneuron library -add_library( - coreneuron - ${COMPILE_LIBRARY_TYPE} - ${CORENEURON_HEADER_FILES} - ${CORENEURON_TEMPLATE_FILES} - ${CORENEURON_CODE_FILES} - ${cudacorenrn_objs} - ${NMODL_INBUILT_MOD_OUTPUTS} - ${MPI_CORE_FILES} - ${CORENRN_MPI_OBJ}) - -target_include_directories(coreneuron PRIVATE ${CORENEURON_PROJECT_SOURCE_DIR} - ${CORENEURON_PROJECT_BINARY_DIR}/generated) +# Library containing the bulk of the non-mechanism CoreNEURON code. This is always created and +# installed as a static library, and then the nrnivmodl-core workflow extracts the object files from +# it and does one of the following: +# +# * shared build: creates libcorenrnmech.so from these objects plus those from the translated MOD +# files +# * static build: creates a (temporary, does not get installed) libcorenrnmech.a from these objects +# plus those from the translated MOD files, then statically links that into special-core +# (nrniv-core) +# +# This scheme means that both core and mechanism .o files are linked in a single step, which is +# important for GPU linking. It does, however, mean that the core code is installed twice, once in +# libcoreneuron-core.a and once in libcorenrnmech.so (shared) or nrniv-core (static). In a GPU +# build, libcoreneuron-cuda.{a,so} is also linked to provide the CUDA implementation of the Hines +# solver. This cannot be included in coreneuron-core because of this issue: +# https://forums.developer.nvidia.com/t/cannot-dynamically-load-a-shared-library-containing-both-openacc-and-cuda-code/210972 +add_library(coreneuron-core STATIC ${CORENEURON_CODE_FILES} ${CORENRN_MPI_OBJ}) +if(CORENRN_ENABLE_GPU) + set(coreneuron_cuda_target coreneuron-cuda) + add_library(coreneuron-cuda ${COMPILE_LIBRARY_TYPE} ${CORENEURON_CUDA_FILES}) + target_link_libraries(coreneuron-core PUBLIC coreneuron-cuda) +endif() + +foreach(target coreneuron-core ${coreneuron_cuda_target}) + target_include_directories(${target} PRIVATE ${CORENEURON_PROJECT_SOURCE_DIR} + ${CORENEURON_PROJECT_BINARY_DIR}/generated) +endforeach() + # we can link to MPI libraries in non-dynamic-mpi build if(CORENRN_ENABLE_MPI AND NOT CORENRN_ENABLE_MPI_DYNAMIC) - target_link_libraries(coreneuron ${MPI_CXX_LIBRARIES}) + target_link_libraries(coreneuron-core PUBLIC ${MPI_CXX_LIBRARIES}) endif() # this is where we handle dynamic mpi library build @@ -175,7 +190,7 @@ if(CORENRN_ENABLE_MPI AND CORENRN_ENABLE_MPI_DYNAMIC) # main coreneuron library needs to be linked to libdl.so and # and should be aware of shared library suffix on different platforms. # ~~~ - target_link_libraries(coreneuron ${CMAKE_DL_LIBS}) + target_link_libraries(coreneuron-core PUBLIC ${CMAKE_DL_LIBS}) # store mpi library targets that will be built list(APPEND corenrn_mpi_targets "") @@ -212,6 +227,7 @@ if(CORENRN_ENABLE_MPI AND CORENRN_ENABLE_MPI_DYNAMIC) list(GET NRN_MPI_LIBNAME_LIST ${val} libname) add_library(core${libname}_lib SHARED ${MPI_LIB_FILES}) + target_link_libraries(core${libname}_lib ${CORENRN_CALIPER_LIB}) target_include_directories( core${libname}_lib PUBLIC ${include} @@ -222,9 +238,8 @@ if(CORENRN_ENABLE_MPI AND CORENRN_ENABLE_MPI_DYNAMIC) # when we will test coreneuron on windows. # ~~~ if(MINGW) # type msmpi only - add_dependencies(core${libname}_lib coreneuron) - target_link_libraries(core${libname}_lib ${MPI_C_LIBRARIES}) - target_link_libraries(core${libname}_lib coreneuron) + add_dependencies(core${libname}_lib coreneuron-core) + target_link_libraries(core${libname}_lib ${MPI_C_LIBRARIES} coreneuron-core) endif() set_property(TARGET core${libname}_lib PROPERTY OUTPUT_NAME core${libname}) list(APPEND corenrn_mpi_targets "core${libname}_lib") @@ -239,48 +254,39 @@ if(CORENRN_ENABLE_MPI AND CORENRN_ENABLE_MPI_DYNAMIC) install(TARGETS ${corenrn_mpi_targets} DESTINATION lib) endif() -# Prevent CMake from running a device code link step when assembling libcoreneuron.a in GPU builds. -# The device code linking needs to be deferred to the final step, where it is done by `nvc++ -cuda`. -set_target_properties(coreneuron PROPERTIES CUDA_SEPARABLE_COMPILATION ON) -# Suppress some compiler warnings. Note in GPU builds this library includes CUDA files. -target_compile_options(coreneuron - PRIVATE $<$:${CORENEURON_CXX_WARNING_SUPPRESSIONS}>) -add_dependencies(coreneuron nrnivmodl-core) - -# scopmath is created separately for nrnivmodl-core workflow -add_library(scopmath STATIC ${CORENEURON_HEADER_FILES} ${SCOPMATH_CODE_FILES}) -target_include_directories(scopmath PRIVATE ${CORENEURON_PROJECT_SOURCE_DIR} - ${CORENEURON_PROJECT_BINARY_DIR}/generated) +# Suppress some compiler warnings. +target_compile_options(coreneuron-core PRIVATE ${CORENEURON_CXX_WARNING_SUPPRESSIONS}) +target_link_libraries(coreneuron-core PUBLIC ${reportinglib_LIBRARY} ${sonatareport_LIBRARY} + ${CORENRN_CALIPER_LIB} ${likwid_LIBRARIES}) -target_link_libraries(coreneuron ${reportinglib_LIBRARY} ${sonatareport_LIBRARY} ${CALIPER_LIB} - ${likwid_LIBRARIES}) - -target_include_directories(coreneuron SYSTEM - PRIVATE ${CORENEURON_PROJECT_SOURCE_DIR}/external/Random123/include) -target_include_directories(coreneuron SYSTEM - PRIVATE ${CORENEURON_PROJECT_SOURCE_DIR}/external/CLI11/include) +# TODO: fix adding a dependency of coreneuron-core on CLI11::CLI11 when CLI11 is a submodule. Right +# now this doesn't work because the CLI11 targets are not exported/installed but coreneuron-core is. +get_target_property(CLI11_HEADER_DIRECTORY CLI11::CLI11 INTERFACE_INCLUDE_DIRECTORIES) +target_include_directories( + coreneuron-core SYSTEM PRIVATE ${CLI11_HEADER_DIRECTORY} + ${CORENEURON_PROJECT_SOURCE_DIR}/external/Random123/include) # See: https://en.cppreference.com/w/cpp/filesystem#Notes if(CMAKE_CXX_COMPILER_IS_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.1) - target_link_libraries(coreneuron stdc++fs) + target_link_libraries(coreneuron-core PUBLIC stdc++fs) endif() if(CORENRN_ENABLE_GPU) - # nrnran123.cpp possibly-temporarily uses Boost.Pool in GPU builds if it's available. + # nrnran123.cpp uses Boost.Pool in GPU builds if it's available. find_package(Boost QUIET) if(Boost_FOUND) message(STATUS "Boost found, enabling use of memory pools for Random123...") - target_include_directories(coreneuron SYSTEM PRIVATE ${Boost_INCLUDE_DIRS}) - target_compile_definitions(coreneuron PRIVATE CORENEURON_USE_BOOST_POOL) + target_include_directories(coreneuron-core SYSTEM PRIVATE ${Boost_INCLUDE_DIRS}) + target_compile_definitions(coreneuron-core PRIVATE CORENEURON_USE_BOOST_POOL) endif() endif() set_target_properties( - coreneuron scopmath + coreneuron-core ${coreneuron_cuda_target} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib - POSITION_INDEPENDENT_CODE ON) -cpp_cc_configure_sanitizers(TARGET coreneuron scopmath ${corenrn_mpi_targets}) + POSITION_INDEPENDENT_CODE ${CORENRN_ENABLE_SHARED}) +cpp_cc_configure_sanitizers(TARGET coreneuron-core ${coreneuron_cuda_target} ${corenrn_mpi_targets}) # ============================================================================= # create special-core with halfgap.mod for tests @@ -288,44 +294,47 @@ cpp_cc_configure_sanitizers(TARGET coreneuron scopmath ${corenrn_mpi_targets}) set(modfile_directory "${CORENEURON_PROJECT_SOURCE_DIR}/tests/integration/ring_gap/mod files") file(GLOB modfiles "${modfile_directory}/*.mod") -if(CORENRN_ENABLE_SHARED) - set(corenrn_mech_library - "${CMAKE_BINARY_DIR}/bin/${CMAKE_SYSTEM_PROCESSOR}/libcorenrnmech${CMAKE_SHARED_LIBRARY_SUFFIX}" - CACHE INTERNAL "coreneuron mechanism library") -else() - set(corenrn_mech_library - "${CMAKE_BINARY_DIR}/bin/${CMAKE_SYSTEM_PROCESSOR}/libcorenrnmech${CMAKE_STATIC_LIBRARY_SUFFIX}" - CACHE INTERNAL "coreneuron mechanism library") -endif() - -set(output_binaries "${CMAKE_BINARY_DIR}/bin/${CMAKE_SYSTEM_PROCESSOR}/special-core" - "${corenrn_mech_library}") +# We have to link things like unit tests against this because some "core" .cpp files refer to +# symbols in the translated versions of default .mod files +set(nrniv_core_prefix "${CMAKE_BINARY_DIR}/bin/${CMAKE_SYSTEM_PROCESSOR}") +set(corenrn_mech_library + "${nrniv_core_prefix}/${CMAKE_${COMPILE_LIBRARY_TYPE}_LIBRARY_PREFIX}corenrnmech${CMAKE_${COMPILE_LIBRARY_TYPE}_LIBRARY_SUFFIX}" +) +set(output_binaries "${nrniv_core_prefix}/special-core" "${corenrn_mech_library}") add_custom_command( OUTPUT ${output_binaries} - DEPENDS scopmath coreneuron ${NMODL_TARGET_TO_DEPEND} ${modfiles} ${CORENEURON_BUILTIN_MODFILES} + DEPENDS coreneuron-core ${NMODL_TARGET_TO_DEPEND} ${modfiles} ${CORENEURON_BUILTIN_MODFILES} COMMAND ${CMAKE_BINARY_DIR}/bin/nrnivmodl-core -b ${COMPILE_LIBRARY_TYPE} -m - ${CORENRN_MOD2CPP_BINARY} -p 1 "${modfile_directory}" + ${CORENRN_MOD2CPP_BINARY} -p 4 "${modfile_directory}" WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/bin COMMENT "Running nrnivmodl-core with halfgap.mod") add_custom_target(nrniv-core ALL DEPENDS ${output_binaries}) if(CORENRN_ENABLE_GPU) separate_arguments(CORENRN_ACC_FLAGS UNIX_COMMAND "${NVHPC_ACC_COMP_FLAGS}") - target_compile_options(coreneuron BEFORE PRIVATE $<$:${CORENRN_ACC_FLAGS}>) - target_compile_options(scopmath BEFORE PRIVATE $<$:${CORENRN_ACC_FLAGS}>) + target_compile_options(coreneuron-core PRIVATE ${CORENRN_ACC_FLAGS}) endif() -# ============================================================================= -# Extract link definitions to be used with nrnivmodl-core -# ============================================================================= -get_target_property(CORENRN_LINK_LIBS coreneuron LINK_LIBRARIES) -if(NOT CORENRN_LINK_LIBS) - set(CORENRN_LINK_LIBS "") +# Create an extra target for use by NEURON when CoreNEURON is being built as a submodule. NEURON +# tests will depend on this, so it must in turn depend on everything that is needed to run nrnivmodl +# -coreneuron. +add_custom_target(coreneuron-for-tests) +add_dependencies(coreneuron-for-tests coreneuron-core ${NMODL_TARGET_TO_DEPEND}) +# Create an extra target for internal use that unit tests and so on can depend on. +# ${corenrn_mech_library} is libcorenrnmech.{a,so}, which contains both the compiled default +# mechanisms and the content of libcoreneuron-core.a. +add_library(coreneuron-all INTERFACE) +target_link_libraries(coreneuron-all INTERFACE "${corenrn_mech_library}") +# Also copy the dependencies of libcoreneuron-core as interface dependencies of this new target +# (example: ${corenrn_mech_library} will probably depend on MPI, so when the unit tests link against +# ${corenrn_mech_library} they need to know to link against MPI too). +get_target_property(coreneuron_core_deps coreneuron-core LINK_LIBRARIES) +if(coreneuron_core_deps) + foreach(dep ${coreneuron_core_deps}) + target_link_libraries(coreneuron-all INTERFACE ${dep}) + endforeach() endif() -set(CORENRN_LINK_LIBS - "${CORENRN_LINK_LIBS}" - PARENT_SCOPE) # Make headers avail to build tree configure_file(engine.h.in ${CMAKE_BINARY_DIR}/include/coreneuron/engine.h @ONLY) @@ -353,19 +362,13 @@ file(COPY apps/coreneuron.cpp DESTINATION ${CMAKE_BINARY_DIR}/share/coreneuron) # coreneuron main libraries install( - TARGETS coreneuron + TARGETS coreneuron-core ${coreneuron_cuda_target} EXPORT coreneuron LIBRARY DESTINATION lib ARCHIVE DESTINATION lib INCLUDES DESTINATION $) -# scopemath into share for nrnivmodl-core -install( - TARGETS scopmath - EXPORT coreneuron - DESTINATION lib) - # headers and some standalone code files for nrnivmodl-core install( DIRECTORY ${CMAKE_BINARY_DIR}/include/coreneuron @@ -390,8 +393,11 @@ install( RENAME nrniv-core) install(FILES apps/coreneuron.cpp DESTINATION share/coreneuron) -# install mechanism library -install(FILES ${corenrn_mech_library} DESTINATION lib) +# install mechanism library in shared library builds, if we're linking statically then there is no +# need +if(CORENRN_ENABLE_SHARED) + install(FILES ${corenrn_mech_library} DESTINATION lib) +endif() # install random123 and nmodl headers install(DIRECTORY ${CMAKE_BINARY_DIR}/include/ DESTINATION include) diff --git a/coreneuron/apps/corenrn_parameters.cpp b/coreneuron/apps/corenrn_parameters.cpp index 40c322b18..6ee920d1f 100644 --- a/coreneuron/apps/corenrn_parameters.cpp +++ b/coreneuron/apps/corenrn_parameters.cpp @@ -5,15 +5,17 @@ # See top-level LICENSE file for details. # =============================================================================. */ - #include "coreneuron/apps/corenrn_parameters.hpp" +#include namespace coreneuron { extern std::string cnrn_version(); -corenrn_parameters::corenrn_parameters() { +corenrn_parameters::corenrn_parameters() + : m_app{std::make_unique("CoreNeuron - Optimised Simulator Engine for NEURON.")} { + auto& app = *m_app; app.set_config("--read-config", "", "Read parameters from ini file", false) ->check(CLI::ExistingFile); app.add_option("--write-config", @@ -167,14 +169,21 @@ corenrn_parameters::corenrn_parameters() { CLI::retire_option(app, "--show"); } +// Implementation in .cpp file where CLI types are complete. +corenrn_parameters::~corenrn_parameters() = default; + +std::string corenrn_parameters::config_to_str(bool default_also, bool write_description) const { + return m_app->config_to_str(default_also, write_description); +} + void corenrn_parameters::reset() { static_cast(*this) = corenrn_parameters_data{}; - app.clear(); + m_app->clear(); } void corenrn_parameters::parse(int argc, char** argv) { try { - app.parse(argc, argv); + m_app->parse(argc, argv); if (verbose == verbose_level::NONE) { nrn_nobanner_ = 1; } @@ -182,11 +191,11 @@ void corenrn_parameters::parse(int argc, char** argv) { // in case of parsing errors, show message with exception std::cerr << "CLI parsing error, see nrniv-core --help for more information. \n" << std::endl; - app.exit(e); + m_app->exit(e); throw e; } catch (const CLI::ParseError& e) { // use --help is also ParseError; in this case exit by showing all options - app.exit(e); + m_app->exit(e); exit(0); } diff --git a/coreneuron/apps/corenrn_parameters.hpp b/coreneuron/apps/corenrn_parameters.hpp index bfe646622..8db8ce06c 100644 --- a/coreneuron/apps/corenrn_parameters.hpp +++ b/coreneuron/apps/corenrn_parameters.hpp @@ -1,18 +1,14 @@ /* # ============================================================================= -# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL +# Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ - #pragma once - +#include +#include #include -#include -#include -#include -#include /** * \class corenrn_parameters @@ -32,6 +28,10 @@ * Also single dash long options are not supported anymore (-mpi -> --mpi). */ +namespace CLI { +struct App; +} + namespace coreneuron { struct corenrn_parameters_data { @@ -94,10 +94,8 @@ struct corenrn_parameters_data { }; struct corenrn_parameters: corenrn_parameters_data { - CLI::App app{"CoreNeuron - Optimised Simulator Engine for NEURON."}; /// CLI app that performs - /// CLI parsing - - corenrn_parameters(); /// Constructor that initializes the CLI11 app. + corenrn_parameters(); /// Constructor that initializes the CLI11 app. + ~corenrn_parameters(); /// Destructor defined in .cpp where CLI11 types are complete. void parse(int argc, char* argv[]); /// Runs the CLI11_PARSE macro. @@ -111,6 +109,22 @@ struct corenrn_parameters: corenrn_parameters_data { inline bool is_quiet() { return verbose == verbose_level::NONE; } + + /** @brief Return a string summarising the current parameter values. + * + * This forwards to the CLI11 method of the same name. Returns a string that + * could be read in as a config of the current values of the App. + * + * @param default_also Include any defaulted arguments. + * @param write_description Include option descriptions and the App description. + */ + std::string config_to_str(bool default_also = false, bool write_description = false) const; + + private: + // CLI app that performs CLI parsing. std::unique_ptr avoids having to + // include CLI11 headers from CoreNEURON headers, and therefore avoids + // CoreNEURON having to install CLI11 when using it from a submodule. + std::unique_ptr m_app; }; std::ostream& operator<<(std::ostream& os, diff --git a/coreneuron/apps/main1.cpp b/coreneuron/apps/main1.cpp index fb74df7d0..a429b04f7 100644 --- a/coreneuron/apps/main1.cpp +++ b/coreneuron/apps/main1.cpp @@ -1,6 +1,6 @@ /* # ============================================================================= -# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL +# Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. @@ -51,16 +51,13 @@ const char* corenrn_version() { return coreneuron::bbcore_write_version; } -// the CORENRN_USE_LEGACY_UNITS determined by CORENRN_ENABLE_LEGACY_UNITS +// the CORENEURON_USE_LEGACY_UNITS determined by CORENRN_ENABLE_LEGACY_UNITS bool corenrn_units_use_legacy() { - return CORENRN_USE_LEGACY_UNITS; + return CORENEURON_USE_LEGACY_UNITS; } void (*nrn2core_part2_clean_)(); -// cf. utils/ispc_globals.c -extern double ispc_celsius; - /** * If "export OMP_NUM_THREADS=n" is not set then omp by default sets * the number of threads equal to the number of cores on this node. @@ -244,9 +241,6 @@ void nrn_init_and_load_data(int argc, corenrn_param.celsius = celsius; - // for ispc backend - ispc_celsius = celsius; - // create net_cvode instance mk_netcvode(); @@ -456,7 +450,7 @@ std::unique_ptr create_report_handler(ReportConfiguration& config using namespace coreneuron; -#if NRNMPI && defined CORENRN_ENABLE_MPI_DYNAMIC +#if NRNMPI && defined(CORENEURON_ENABLE_MPI_DYNAMIC) static void* load_dynamic_mpi(const std::string& libname) { dlerror(); void* handle = dlopen(libname.c_str(), RTLD_NOW | RTLD_GLOBAL); @@ -478,7 +472,7 @@ extern "C" void mk_mech_init(int argc, char** argv) { #if NRNMPI if (corenrn_param.mpi_enable) { -#ifdef CORENRN_ENABLE_MPI_DYNAMIC +#ifdef CORENEURON_ENABLE_MPI_DYNAMIC // coreneuron rely on neuron to detect mpi library distribution and // the name of the library itself. Make sure the library name is specified // via CLI option. @@ -506,12 +500,16 @@ extern "C" void mk_mech_init(int argc, char** argv) { #ifdef CORENEURON_ENABLE_GPU if (corenrn_param.gpu) { init_gpu(); + cnrn_target_copyin(&celsius); + cnrn_target_copyin(&pi); + cnrn_target_copyin(&secondorder); + nrnran123_initialise_global_state_on_device(); } #endif if (!corenrn_param.writeParametersFilepath.empty()) { std::ofstream out(corenrn_param.writeParametersFilepath, std::ios::trunc); - out << corenrn_param.app.config_to_str(false, false); + out << corenrn_param.config_to_str(false, false); out.close(); } @@ -685,6 +683,10 @@ extern "C" int run_solve_core(int argc, char** argv) { if (nrn_have_gaps) { nrn_partrans::delete_gap_indices_from_device(); } + nrnran123_destroy_global_state_on_device(); + cnrn_target_delete(&secondorder); + cnrn_target_delete(&pi); + cnrn_target_delete(&celsius); } // Cleaning the memory diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp index 3eff82fe1..1fcc59478 100644 --- a/coreneuron/gpu/nrn_acc_manager.cpp +++ b/coreneuron/gpu/nrn_acc_manager.cpp @@ -10,6 +10,7 @@ #include #include "coreneuron/apps/corenrn_parameters.hpp" +#include "coreneuron/gpu/nrn_acc_manager.hpp" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/network/netcon.hpp" #include "coreneuron/nrniv/nrniv_decl.h" @@ -31,15 +32,198 @@ #include #endif +#if __has_include() +#define USE_CXXABI +#include +#include +#include +#endif + +#ifdef CORENEURON_ENABLE_PRESENT_TABLE +#include +#include +#include +#include +#include +namespace { +struct present_table_value { + std::size_t ref_count{}, size{}; + std::byte* dev_ptr{}; +}; +std::map present_table; +std::shared_mutex present_table_mutex; +} // namespace +#endif + +namespace { +/** @brief Try to demangle a type name, return the mangled name on failure. + */ +std::string cxx_demangle(const char* mangled) { +#ifdef USE_CXXABI + int status{}; + // Note that the third argument to abi::__cxa_demangle returns the length of + // the allocated buffer, which may be larger than strlen(demangled) + 1. + std::unique_ptr demangled{ + abi::__cxa_demangle(mangled, nullptr, nullptr, &status), free}; + return status ? mangled : demangled.get(); +#else + return mangled; +#endif +} +bool cnrn_target_debug_output_enabled() { + const char* env = std::getenv("CORENEURON_GPU_DEBUG"); + if (!env) { + return false; + } + std::string env_s{env}; + if (env_s == "1") { + return true; + } else if (env_s == "0") { + return false; + } else { + throw std::runtime_error("CORENEURON_GPU_DEBUG must be set to 0 or 1 (got " + env_s + ")"); + } +} +bool cnrn_target_enable_debug{cnrn_target_debug_output_enabled()}; +} // namespace + namespace coreneuron { extern InterleaveInfo* interleave_info; -void copy_ivoc_vect_to_device(const IvocVect& iv, IvocVect& div); -void delete_ivoc_vect_from_device(IvocVect&); void nrn_ion_global_map_copyto_device(); void nrn_ion_global_map_delete_from_device(); void nrn_VecPlay_copyto_device(NrnThread* nt, void** d_vecplay); void nrn_VecPlay_delete_from_device(NrnThread* nt); +void cnrn_target_copyin_debug(std::string_view file, + int line, + std::size_t sizeof_T, + std::type_info const& typeid_T, + void const* h_ptr, + std::size_t len, + void* d_ptr) { + if (!cnrn_target_enable_debug) { + return; + } + std::cerr << file << ':' << line << ": cnrn_target_copyin<" << cxx_demangle(typeid_T.name()) + << ">(" << h_ptr << ", " << len << " * " << sizeof_T << " = " << len * sizeof_T + << ") -> " << d_ptr << std::endl; +} +void cnrn_target_delete_debug(std::string_view file, + int line, + std::size_t sizeof_T, + std::type_info const& typeid_T, + void const* h_ptr, + std::size_t len) { + if (!cnrn_target_enable_debug) { + return; + } + std::cerr << file << ':' << line << ": cnrn_target_delete<" << cxx_demangle(typeid_T.name()) + << ">(" << h_ptr << ", " << len << " * " << sizeof_T << " = " << len * sizeof_T << ')' + << std::endl; +} +void cnrn_target_deviceptr_debug(std::string_view file, + int line, + std::type_info const& typeid_T, + void const* h_ptr, + void* d_ptr) { + if (!cnrn_target_enable_debug) { + return; + } + std::cerr << file << ':' << line << ": cnrn_target_deviceptr<" << cxx_demangle(typeid_T.name()) + << ">(" << h_ptr << ") -> " << d_ptr << std::endl; +} +void cnrn_target_is_present_debug(std::string_view file, + int line, + std::type_info const& typeid_T, + void const* h_ptr, + void* d_ptr) { + if (!cnrn_target_enable_debug) { + return; + } + std::cerr << file << ':' << line << ": cnrn_target_is_present<" << cxx_demangle(typeid_T.name()) + << ">(" << h_ptr << ") -> " << d_ptr << std::endl; +} +void cnrn_target_memcpy_to_device_debug(std::string_view file, + int line, + std::size_t sizeof_T, + std::type_info const& typeid_T, + void const* h_ptr, + std::size_t len, + void* d_ptr) { + if (!cnrn_target_enable_debug) { + return; + } + std::cerr << file << ':' << line << ": cnrn_target_memcpy_to_device<" + << cxx_demangle(typeid_T.name()) << ">(" << d_ptr << ", " << h_ptr << ", " << len + << " * " << sizeof_T << " = " << len * sizeof_T << ')' << std::endl; +} + +#ifdef CORENEURON_ENABLE_PRESENT_TABLE +std::pair cnrn_target_deviceptr_impl(bool must_be_present_or_null, void const* h_ptr) { + if (!h_ptr) { + return {nullptr, false}; + } + // Concurrent calls to this method are safe, but they must be serialised + // w.r.t. calls to the cnrn_target_*_update_present_table methods. + std::shared_lock _{present_table_mutex}; + if (present_table.empty()) { + return {nullptr, must_be_present_or_null}; + } + // prev(first iterator greater than h_ptr or last if not found) gives the first iterator less + // than or equal to h_ptr + auto const iter = std::prev(std::upper_bound( + present_table.begin(), present_table.end(), h_ptr, [](void const* hp, auto const& entry) { + return hp < entry.first; + })); + if (iter == present_table.end()) { + return {nullptr, must_be_present_or_null}; + } + std::byte const* const h_byte_ptr{static_cast(h_ptr)}; + std::byte const* const h_start_of_block{iter->first}; + std::size_t const block_size{iter->second.size}; + std::byte* const d_start_of_block{iter->second.dev_ptr}; + bool const is_present{h_byte_ptr < h_start_of_block + block_size}; + if (!is_present) { + return {nullptr, must_be_present_or_null}; + } + return {d_start_of_block + (h_byte_ptr - h_start_of_block), false}; +} + +void cnrn_target_copyin_update_present_table(void const* h_ptr, void* d_ptr, std::size_t len) { + if (!h_ptr) { + assert(!d_ptr); + return; + } + std::lock_guard _{present_table_mutex}; + // TODO include more pedantic overlap checking? + present_table_value new_val{}; + new_val.size = len; + new_val.ref_count = 1; + new_val.dev_ptr = static_cast(d_ptr); + auto const [iter, inserted] = present_table.emplace(static_cast(h_ptr), + std::move(new_val)); + if (!inserted) { + // Insertion didn't occur because h_ptr was already in the present table + assert(iter->second.size == len); + assert(iter->second.dev_ptr == new_val.dev_ptr); + ++(iter->second.ref_count); + } +} +void cnrn_target_delete_update_present_table(void const* h_ptr, std::size_t len) { + if (!h_ptr) { + return; + } + std::lock_guard _{present_table_mutex}; + auto const iter = present_table.find(static_cast(h_ptr)); + assert(iter != present_table.end()); + assert(iter->second.size == len); + --(iter->second.ref_count); + if (iter->second.ref_count == 0) { + present_table.erase(iter); + } +} +#endif + int cnrn_target_get_num_devices() { #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENACC) @@ -76,7 +260,7 @@ void cnrn_target_set_default_device(int device_num) { } #ifdef CORENEURON_ENABLE_GPU - +#ifndef CORENEURON_UNIFIED_MEMORY static Memb_list* copy_ml_to_device(const Memb_list* ml, int type) { // As we never run code for artificial cell inside GPU we don't copy it. int is_art = corenrn.get_is_artificial()[type]; @@ -86,6 +270,14 @@ static Memb_list* copy_ml_to_device(const Memb_list* ml, int type) { auto d_ml = cnrn_target_copyin(ml); + if (ml->global_variables) { + assert(ml->global_variables_size); + void* d_inst = cnrn_target_copyin(static_cast(ml->global_variables), + ml->global_variables_size); + cnrn_target_memcpy_to_device(&(d_ml->global_variables), &d_inst); + } + + int n = ml->nodecount; int szp = corenrn.get_prop_param_size()[type]; int szdp = corenrn.get_prop_dparam_size()[type]; @@ -168,6 +360,7 @@ static Memb_list* copy_ml_to_device(const Memb_list* ml, int type) { return d_ml; } +#endif static void update_ml_on_host(const Memb_list* ml, int type) { int is_art = corenrn.get_is_artificial()[type]; @@ -257,6 +450,13 @@ static void delete_ml_from_device(Memb_list* ml, int type) { cnrn_target_delete(ml->pdata, pcnt); } cnrn_target_delete(ml->nodeindices, n); + + if (ml->global_variables) { + assert(ml->global_variables_size); + cnrn_target_delete(static_cast(ml->global_variables), + ml->global_variables_size); + } + cnrn_target_delete(ml); } @@ -603,9 +803,8 @@ void delete_ivoc_vect_from_device(IvocVect& vec) { if (n) { cnrn_target_delete(vec.data(), n); } - cnrn_target_delete(&vec); #else - (void) vec; + static_cast(vec); #endif } @@ -1108,7 +1307,7 @@ void nrn_newtonspace_delete_from_device(NewtonSpace* ns) { } void nrn_sparseobj_copyto_device(SparseObj* so) { -#ifdef CORENEURON_ENABLE_GPU +#if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_UNIFIED_MEMORY) // FIXME this check needs to be tweaked if we ever want to run with a mix // of CPU and GPU threads. if (nrn_threads[0].compute_gpu == 0) { @@ -1191,7 +1390,7 @@ void nrn_sparseobj_copyto_device(SparseObj* so) { } void nrn_sparseobj_delete_from_device(SparseObj* so) { -#ifdef CORENEURON_ENABLE_GPU +#if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_UNIFIED_MEMORY) // FIXME this check needs to be tweaked if we ever want to run with a mix // of CPU and GPU threads. if (nrn_threads[0].compute_gpu == 0) { @@ -1312,7 +1511,7 @@ void nrn_VecPlay_copyto_device(NrnThread* nt, void** d_vecplay) { void nrn_VecPlay_delete_from_device(NrnThread* nt) { for (int i = 0; i < nt->n_vecplay; i++) { - auto* vecplay_instance = reinterpret_cast(nt->_vecplay[i]); + auto* vecplay_instance = static_cast(nt->_vecplay[i]); cnrn_target_delete(vecplay_instance->e_); if (vecplay_instance->discon_indices_) { delete_ivoc_vect_from_device(*(vecplay_instance->discon_indices_)); diff --git a/coreneuron/gpu/nrn_acc_manager.hpp b/coreneuron/gpu/nrn_acc_manager.hpp index 72d222cdd..5a2a6f544 100644 --- a/coreneuron/gpu/nrn_acc_manager.hpp +++ b/coreneuron/gpu/nrn_acc_manager.hpp @@ -1,17 +1,16 @@ /* # ============================================================================= -# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL +# Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ - -#ifndef _nrn_device_manager_ -#define _nrn_device_manager_ - -#include "coreneuron/sim/multicore.hpp" +#pragma once namespace coreneuron { +struct Memb_list; +struct NrnThread; +struct NetSendBuffer_t; void setup_nrnthreads_on_device(NrnThread* threads, int nthreads); void delete_nrnthreads_on_device(NrnThread* threads, int nthreads); void update_nrnthreads_on_host(NrnThread* threads, int nthreads); @@ -24,6 +23,4 @@ void update_net_send_buffer_on_host(NrnThread* nt, NetSendBuffer_t* nsb); void update_weights_from_gpu(NrnThread* threads, int nthreads); void init_gpu(); - } // namespace coreneuron -#endif // _nrn_device_manager_ diff --git a/coreneuron/io/core2nrn_data_return.cpp b/coreneuron/io/core2nrn_data_return.cpp index 6a12c197f..87a549ac6 100644 --- a/coreneuron/io/core2nrn_data_return.cpp +++ b/coreneuron/io/core2nrn_data_return.cpp @@ -137,7 +137,7 @@ static void core2nrn_corepointer(int tid, NrnThreadMembList* tml) { d = ml->data + nrn_i_layout(jp, ml->nodecount, 0, dsz, layout); pd = ml->pdata + nrn_i_layout(jp, ml->nodecount, 0, pdsz, layout); (*corenrn.get_bbcore_write()[type])( - nullptr, nullptr, &dcnt, &icnt, 0, aln_cntml, d, pd, ml->_thread, &nt, 0.0); + nullptr, nullptr, &dcnt, &icnt, 0, aln_cntml, d, pd, ml->_thread, &nt, ml, 0.0); } std::unique_ptr iArray; @@ -159,8 +159,18 @@ static void core2nrn_corepointer(int tid, NrnThreadMembList* tml) { d = ml->data + nrn_i_layout(jp, ml->nodecount, 0, dsz, layout); pd = ml->pdata + nrn_i_layout(jp, ml->nodecount, 0, pdsz, layout); - (*corenrn.get_bbcore_write()[type])( - dArray.get(), iArray.get(), &dcnt, &icnt, 0, aln_cntml, d, pd, ml->_thread, &nt, 0.0); + (*corenrn.get_bbcore_write()[type])(dArray.get(), + iArray.get(), + &dcnt, + &icnt, + 0, + aln_cntml, + d, + pd, + ml->_thread, + &nt, + ml, + 0.0); } (*core2nrn_corepointer_mech_)(tid, type, icnt, dcnt, iArray.get(), dArray.get()); diff --git a/coreneuron/io/global_vars.cpp b/coreneuron/io/global_vars.cpp index 128a1cdb9..815423ea9 100644 --- a/coreneuron/io/global_vars.cpp +++ b/coreneuron/io/global_vars.cpp @@ -142,7 +142,7 @@ void set_globals(const char* path, bool cli_global_seed, int cli_global_seed_val } else if (strcmp(name, "Random123_globalindex") == 0) { nrnran123_set_globalindex((uint32_t) n); } else if (strcmp(name, "_nrnunit_use_legacy_") == 0) { - if (n != CORENRN_USE_LEGACY_UNITS) { + if (n != CORENEURON_USE_LEGACY_UNITS) { hoc_execerror( "CORENRN_ENABLE_LEGACY_UNITS not" " consistent with NEURON value of" diff --git a/coreneuron/io/nrn2core_data_init.cpp b/coreneuron/io/nrn2core_data_init.cpp index e732dec11..ad7106f6e 100644 --- a/coreneuron/io/nrn2core_data_init.cpp +++ b/coreneuron/io/nrn2core_data_init.cpp @@ -407,6 +407,7 @@ extern void** pattern_stim_info_ref(int icnt, Datum* _ppvar, ThreadDatum* _thread, NrnThread* _nt, + Memb_list* ml, double v); extern "C" { @@ -437,7 +438,7 @@ void nrn2core_patstim_share_info() { assert(0); } - void** info = pattern_stim_info_ref(_iml, _cntml, _p, _ppvar, nullptr, nt, 0.0); + void** info = pattern_stim_info_ref(_iml, _cntml, _p, _ppvar, nullptr, nt, ml, 0.0); (*nrn2core_patternstim_)(info); } } diff --git a/coreneuron/io/nrn_checkpoint.cpp b/coreneuron/io/nrn_checkpoint.cpp index 955848901..ecf432422 100644 --- a/coreneuron/io/nrn_checkpoint.cpp +++ b/coreneuron/io/nrn_checkpoint.cpp @@ -449,7 +449,7 @@ void CheckPoints::write_phase2(NrnThread& nt) const { d = ml->data + nrn_i_layout(jp, ml->nodecount, 0, dsz, layout); pd = ml->pdata + nrn_i_layout(jp, ml->nodecount, 0, pdsz, layout); (*corenrn.get_bbcore_write()[type])( - nullptr, nullptr, &dcnt, &icnt, 0, aln_cntml, d, pd, ml->_thread, &nt, 0.0); + nullptr, nullptr, &dcnt, &icnt, 0, aln_cntml, d, pd, ml->_thread, &nt, ml, 0.0); } fh << icnt << "\n"; fh << dcnt << "\n"; @@ -478,7 +478,7 @@ void CheckPoints::write_phase2(NrnThread& nt) const { pd = ml->pdata + nrn_i_layout(jp, ml->nodecount, 0, pdsz, layout); (*corenrn.get_bbcore_write()[type])( - dArray, iArray, &dcnt, &icnt, 0, aln_cntml, d, pd, ml->_thread, &nt, 0.0); + dArray, iArray, &dcnt, &icnt, 0, aln_cntml, d, pd, ml->_thread, &nt, ml, 0.0); } if (icnt) { @@ -592,6 +592,7 @@ bool CheckPoints::initialize() { ml->pdata, ml->_thread, nrn_threads, + ml, 0.0); break; } @@ -802,6 +803,7 @@ void CheckPoints::write_tqueue(NrnThread& nt, FileHandler& fh) const { ml->pdata, ml->_thread, nrn_threads, + ml, 0.0); break; } diff --git a/coreneuron/io/nrn_setup.cpp b/coreneuron/io/nrn_setup.cpp index c22ffc0ce..703e853d8 100644 --- a/coreneuron/io/nrn_setup.cpp +++ b/coreneuron/io/nrn_setup.cpp @@ -754,6 +754,14 @@ void nrn_cleanup() { ml->_thread = nullptr; } + // Destroy the global variables struct allocated in nrn_init + if (auto* const priv_dtor = corenrn.get_memb_func(tml->index).private_destructor) { + (*priv_dtor)(nt, ml, tml->index); + assert(!ml->instance); + assert(!ml->global_variables); + assert(ml->global_variables_size == 0); + } + NetReceiveBuffer_t* nrb = ml->_net_receive_buffer; if (nrb) { if (nrb->_size) { diff --git a/coreneuron/io/phase2.cpp b/coreneuron/io/phase2.cpp index a97b335bb..ad5748ad7 100644 --- a/coreneuron/io/phase2.cpp +++ b/coreneuron/io/phase2.cpp @@ -337,7 +337,7 @@ void Phase2::read_direct(int thread_id, const NrnThread& nt) { offset += nrn_soa_padded_size(nodecounts[i], layout) * param_sizes[type]; if (nodeindices_) { std::copy(nodeindices_, nodeindices_ + nodecounts[i], tml.nodeindices.data()); - free_memory(nodeindices_); + free(nodeindices_); // not free_memory because this is allocated by NEURON? } if (corenrn.get_is_artificial()[type]) { assert(nodeindices_ == nullptr); @@ -867,6 +867,7 @@ void Phase2::get_info_from_bbcore(NrnThread& nt, pd, ml->_thread, &nt, + ml, 0.0); } assert(dk == static_cast(tmls[i].dArray.size())); @@ -958,7 +959,8 @@ void Phase2::populate(NrnThread& nt, const UserParams& userParams) { NrnThreadMembList* tml_last = nullptr; for (int i = 0; i < n_mech; ++i) { - auto tml = create_tml(i, memb_func[mech_types[i]], shadow_rhs_cnt, mech_types, nodecounts); + auto tml = + create_tml(nt, i, memb_func[mech_types[i]], shadow_rhs_cnt, mech_types, nodecounts); nt._ml_list[tml->index] = tml->ml; diff --git a/coreneuron/mechanism/capac.cpp b/coreneuron/mechanism/capac.cpp index 42c65cb18..f47a4ebd7 100644 --- a/coreneuron/mechanism/capac.cpp +++ b/coreneuron/mechanism/capac.cpp @@ -32,10 +32,12 @@ void capacitance_reg(void) { /* all methods deal with capacitance in special ways */ register_mech(mechanism, nrn_alloc_capacitance, - (mod_f_t) 0, - (mod_f_t) 0, - (mod_f_t) 0, - (mod_f_t) nrn_init_capacitance, + nullptr, + nullptr, + nullptr, + nrn_init_capacitance, + nullptr, + nullptr, -1, 1); int mechtype = nrn_get_mechtype(mechanism[1]); diff --git a/coreneuron/mechanism/eion.cpp b/coreneuron/mechanism/eion.cpp index 4bc077880..ec1fd665e 100644 --- a/coreneuron/mechanism/eion.cpp +++ b/coreneuron/mechanism/eion.cpp @@ -1,6 +1,6 @@ /* # ============================================================================= -# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL +# Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. @@ -94,9 +94,11 @@ void ion_reg(const char* name, double valence) { register_mech((const char**) mechanism, nrn_alloc_ion, nrn_cur_ion, - (mod_f_t) 0, - (mod_f_t) 0, - (mod_f_t) nrn_init_ion, + nullptr, + nullptr, + nrn_init_ion, + nullptr, + nullptr, -1, 1); mechtype = nrn_get_mechtype(mechanism[1]); @@ -154,70 +156,6 @@ the USEION statement of any model using this ion\n", } } -#ifndef CORENRN_USE_LEGACY_UNITS -#define CORENRN_USE_LEGACY_UNITS 0 -#endif - -#if CORENRN_USE_LEGACY_UNITS == 1 -#define FARADAY 96485.309 -#define gasconstant 8.3134 -#else -#include "coreneuron/nrnoc/nrnunits_modern.h" -#define FARADAY _faraday_codata2018 -#define gasconstant _gasconstant_codata2018 -#endif - -#define ktf (1000. * gasconstant * (celsius + 273.15) / FARADAY) - -double nrn_nernst(double ci, double co, double z, double celsius) { - /*printf("nrn_nernst %g %g %g\n", ci, co, z);*/ - if (z == 0) { - return 0.; - } - if (ci <= 0.) { - return 1e6; - } else if (co <= 0.) { - return -1e6; - } else { - return ktf / z * log(co / ci); - } -} - -nrn_pragma_omp(declare target) -void nrn_wrote_conc(int type, - double* p1, - int p2, - int it, - double** gimap, - double celsius, - int _cntml_padded) { - if (it & 040) { - int _iml = 0; - /* passing _nt to this function causes cray compiler to segfault during compilation - * hence passing _cntml_padded - */ - double* pe = p1 - p2 * _STRIDE; - pe[0] = nrn_nernst(pe[1 * _STRIDE], pe[2 * _STRIDE], gimap[type][2], celsius); - } -} - -static double efun(double x) { - if (fabs(x) < 1e-4) { - return 1. - x / 2.; - } else { - return x / (exp(x) - 1); - } -} - -nrn_pragma_omp(end declare target) - -double nrn_ghk(double v, double ci, double co, double z) { - double temp = z * v / ktf; - double eco = co * efun(temp); - double eci = ci * efun(-temp); - return (.001) * z * FARADAY * (eci - eco); -} - #if VECTORIZE #define erev pd[0 * _STRIDE] /* From Eion */ #define conci pd[1 * _STRIDE] @@ -257,7 +195,7 @@ ion_style("name_ion", [c_style, e_style, einit, eadvance, cinit]) double nrn_nernst_coef(int type) { /* for computing jacobian element dconc'/dconc */ - return ktf / charge; + return ktf(celsius) / charge; } /* Must be called prior to any channels which update the currents */ diff --git a/coreneuron/mechanism/mech/mod2c_core_thread.hpp b/coreneuron/mechanism/mech/mod2c_core_thread.hpp index 85ed348f6..d18160f3a 100644 --- a/coreneuron/mechanism/mech/mod2c_core_thread.hpp +++ b/coreneuron/mechanism/mech/mod2c_core_thread.hpp @@ -16,14 +16,14 @@ namespace coreneuron { #define _STRIDE _cntml_padded + _iml -#define _threadargscomma_ _iml, _cntml_padded, _p, _ppvar, _thread, _nt, _v, +#define _threadargscomma_ _iml, _cntml_padded, _p, _ppvar, _thread, _nt, _ml, _v, #define _threadargsprotocomma_ \ int _iml, int _cntml_padded, double *_p, Datum *_ppvar, ThreadDatum *_thread, NrnThread *_nt, \ - double _v, -#define _threadargs_ _iml, _cntml_padded, _p, _ppvar, _thread, _nt, _v + Memb_list *_ml, double _v, +#define _threadargs_ _iml, _cntml_padded, _p, _ppvar, _thread, _nt, _ml, _v #define _threadargsproto_ \ int _iml, int _cntml_padded, double *_p, Datum *_ppvar, ThreadDatum *_thread, NrnThread *_nt, \ - double _v + Memb_list *_ml, double _v struct Elm { unsigned row; /* Row location */ diff --git a/coreneuron/mechanism/mechanism.hpp b/coreneuron/mechanism/mechanism.hpp index ab78ad502..9427423df 100644 --- a/coreneuron/mechanism/mechanism.hpp +++ b/coreneuron/mechanism/mechanism.hpp @@ -143,6 +143,12 @@ struct Memb_list { NetSendBuffer_t* _net_send_buffer = nullptr; int nodecount; /* actual node count */ int _nodecount_padded; - void* instance = nullptr; /* mechanism instance */ + void* instance{nullptr}; /* mechanism instance struct */ + // nrn_acc_manager.cpp handles data movement to/from the accelerator as the + // "private constructor" in the translated MOD file code is called before + // the main nrn_acc_manager methods that copy thread/mechanism data to the + // device + void* global_variables{nullptr}; + std::size_t global_variables_size{}; }; } // namespace coreneuron diff --git a/coreneuron/mechanism/membfunc.hpp b/coreneuron/mechanism/membfunc.hpp index 2556f0f87..ac650595c 100644 --- a/coreneuron/mechanism/membfunc.hpp +++ b/coreneuron/mechanism/membfunc.hpp @@ -1,17 +1,19 @@ /* # ============================================================================= -# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL +# Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ - #pragma once -#include - #include "coreneuron/mechanism/mechanism.hpp" #include "coreneuron/utils/offload.hpp" +#include "coreneuron/utils/units.hpp" + +#include +#include + namespace coreneuron { using Pfrpdat = Datum* (*) (void); @@ -21,6 +23,8 @@ struct NrnThread; using mod_alloc_t = void (*)(double*, Datum*, int); using mod_f_t = void (*)(NrnThread*, Memb_list*, int); using pnt_receive_t = void (*)(Point_process*, int, double); +using thread_table_check_t = + void (*)(int, int, double*, Datum*, ThreadDatum*, NrnThread*, Memb_list*, int); /* * Memb_func structure contains all related informations of a mechanism @@ -33,12 +37,17 @@ struct Memb_func { mod_f_t initialize; mod_f_t constructor; mod_f_t destructor; /* only for point processes */ + // These are used for CoreNEURON-internal allocation/cleanup; they are kept + // separate from the CONSTRUCTOR/DESTRUCTOR functions just above (one of + // which is apparently only for point processes) for simplicity. + mod_f_t private_constructor; + mod_f_t private_destructor; Symbol* sym; int vectorized; int thread_size_; /* how many Datum needed in Memb_list if vectorized */ void (*thread_mem_init_)(ThreadDatum*); /* after Memb_list._thread is allocated */ void (*thread_cleanup_)(ThreadDatum*); /* before Memb_list._thread is freed */ - void (*thread_table_check_)(int, int, double*, Datum*, ThreadDatum*, NrnThread*, int); + thread_table_check_t thread_table_check_; int is_point; void (*setdata_)(double*, Datum*); int* dparam_semantics; /* for nrncore writing. */ @@ -87,6 +96,8 @@ extern int register_mech(const char** m, mod_f_t jacob, mod_f_t stat, mod_f_t initialize, + mod_f_t private_constructor, + mod_f_t private_destructor, int nrnpointerindex, int vectorized); extern int point_register_mech(const char**, @@ -95,6 +106,8 @@ extern int point_register_mech(const char**, mod_f_t jacob, mod_f_t stat, mod_f_t initialize, + mod_f_t private_constructor, + mod_f_t private_destructor, int nrnpointerindex, mod_f_t constructor, mod_f_t destructor, @@ -110,14 +123,53 @@ extern void hoc_register_watch_check(nrn_watch_check_t, int); extern void nrn_jacob_capacitance(NrnThread*, Memb_list*, int); extern void nrn_writes_conc(int, int); -nrn_pragma_omp(declare target) -nrn_pragma_acc(routine seq) -extern void nrn_wrote_conc(int, double*, int, int, double**, double, int); -nrn_pragma_acc(routine seq) -double nrn_nernst(double ci, double co, double z, double celsius); -nrn_pragma_acc(routine seq) -extern double nrn_ghk(double v, double ci, double co, double z); -nrn_pragma_omp(end declare target) +constexpr double ktf(double celsius) { + return 1000. * units::gasconstant * (celsius + 273.15) / units::faraday; +} +// std::log isn't constexpr, but there are argument values for which nrn_nernst +// is a constant expression +constexpr double nrn_nernst(double ci, double co, double z, double celsius) { + if (z == 0) { + return 0.; + } + if (ci <= 0.) { + return 1e6; + } else if (co <= 0.) { + return -1e6; + } else { + return ktf(celsius) / z * std::log(co / ci); + } +} +constexpr void nrn_wrote_conc(int type, + double* p1, + int p2, + int it, + double** gimap, + double celsius, + int _cntml_padded) { + if (it & 040) { + constexpr int _iml = 0; + int const STRIDE{_cntml_padded + _iml}; + /* passing _nt to this function causes cray compiler to segfault during compilation + * hence passing _cntml_padded + */ + double* pe = p1 - p2 * STRIDE; + pe[0] = nrn_nernst(pe[1 * STRIDE], pe[2 * STRIDE], gimap[type][2], celsius); + } +} +inline double nrn_ghk(double v, double ci, double co, double z, double celsius) { + auto const efun = [](double x) { + if (std::abs(x) < 1e-4) { + return 1. - x / 2.; + } else { + return x / (std::exp(x) - 1.); + } + }; + double const temp{z * v / ktf(celsius)}; + double const eco{co * efun(+temp)}; + double const eci{ci * efun(-temp)}; + return .001 * z * units::faraday * (eci - eco); +} extern void hoc_register_prop_size(int, int, int); extern void hoc_register_dparam_semantics(int type, int, const char* name); extern void hoc_reg_ba(int, mod_f_t, int); @@ -151,6 +203,7 @@ using bbcore_read_t = void (*)(double*, Datum*, ThreadDatum*, NrnThread*, + Memb_list*, double); using bbcore_write_t = void (*)(double*, @@ -163,6 +216,7 @@ using bbcore_write_t = void (*)(double*, Datum*, ThreadDatum*, NrnThread*, + Memb_list*, double); extern int nrn_mech_depend(int type, int* dependencies); @@ -179,10 +233,6 @@ extern void artcell_net_move(void**, Point_process*, double); extern void nrn2ncs_outputevent(int netcon_output_index, double firetime); extern bool nrn_use_localgid_; extern void net_sem_from_gpu(int sendtype, int i_vdata, int, int ith, int ipnt, double, double); -nrn_pragma_acc(routine seq) -nrn_pragma_omp(declare target) -extern int at_time(NrnThread*, double); -nrn_pragma_omp(end declare target) // _OPENACC and/or NET_RECEIVE_BUFFERING extern void net_sem_from_gpu(int, int, int, int, int, double, double); diff --git a/coreneuron/mechanism/nrnoc_ml.ispc b/coreneuron/mechanism/nrnoc_ml.ispc index 6b196eaf3..2c28a745f 100644 --- a/coreneuron/mechanism/nrnoc_ml.ispc +++ b/coreneuron/mechanism/nrnoc_ml.ispc @@ -57,6 +57,8 @@ struct Memb_list { uniform int nodecount; uniform int _nodecount_padded; void* uniform instance; + void* uniform global_variables; + uniform size_t global_variables_size; }; struct Point_process { @@ -153,8 +155,6 @@ struct NrnThread { void* mapping; }; -extern uniform double ispc_celsius; - // NOTE : this implementation is duplicated from "coreneuron/network/cvodestb.cpp" // If changes are required, make sure to change CPP as well. static inline int at_time(uniform NrnThread* nt, varying double te) { diff --git a/coreneuron/mechanism/patternstim.cpp b/coreneuron/mechanism/patternstim.cpp index e22b19e98..e680a6187 100644 --- a/coreneuron/mechanism/patternstim.cpp +++ b/coreneuron/mechanism/patternstim.cpp @@ -38,6 +38,7 @@ extern void pattern_stim_setup_helper(int size, Datum* _ppvar, ThreadDatum* _thread, NrnThread* _nt, + Memb_list* ml, double v); static size_t read_raster_file(const char* fname, double** tvec, int** gidvec, double tstop); @@ -93,7 +94,7 @@ void nrn_mkPatternStim(const char* fname, double tstop) { } else { assert(0); } - pattern_stim_setup_helper(size, tvec, gidvec, _iml, _cntml, _p, _ppvar, nullptr, nt, 0.0); + pattern_stim_setup_helper(size, tvec, gidvec, _iml, _cntml, _p, _ppvar, nullptr, nt, ml, 0.0); } size_t read_raster_file(const char* fname, double** tvec, int** gidvec, double tstop) { @@ -136,8 +137,8 @@ size_t read_raster_file(const char* fname, double** tvec, int** gidvec, double t } // see nrn_setup.cpp:read_phase2 for how it creates NrnThreadMembList instances. -static NrnThreadMembList* alloc_nrn_thread_memb(int type) { - NrnThreadMembList* tml = (NrnThreadMembList*) emalloc(sizeof(NrnThreadMembList)); +static NrnThreadMembList* alloc_nrn_thread_memb(NrnThread* nt, int type) { + NrnThreadMembList* tml = (NrnThreadMembList*) ecalloc(1, sizeof(NrnThreadMembList)); tml->dependencies = nullptr; tml->ndependencies = 0; tml->index = type; @@ -148,7 +149,7 @@ static NrnThreadMembList* alloc_nrn_thread_memb(int type) { int psize = corenrn.get_prop_param_size()[type]; int dsize = corenrn.get_prop_dparam_size()[type]; int layout = corenrn.get_mech_data_layout()[type]; - tml->ml = (Memb_list*) emalloc(sizeof(Memb_list)); + tml->ml = (Memb_list*) ecalloc(1, sizeof(Memb_list)); tml->ml->nodecount = 1; tml->ml->_nodecount_padded = tml->ml->nodecount; tml->ml->nodeindices = nullptr; @@ -160,6 +161,10 @@ static NrnThreadMembList* alloc_nrn_thread_memb(int type) { tml->ml->_net_send_buffer = nullptr; tml->ml->_permute = nullptr; + if (auto* const priv_ctor = corenrn.get_memb_func(tml->index).private_constructor) { + priv_ctor(nt, tml->ml, tml->index); + } + return tml; } @@ -177,7 +182,7 @@ Point_process* nrn_artcell_instantiate(const char* mechname) { // printf("nrn_artcell_instantiate %s type=%d\n", mechname, type); // create and append to nt.tml - auto tml = alloc_nrn_thread_memb(type); + auto tml = alloc_nrn_thread_memb(nt, type); assert(nt->_ml_list[type] == nullptr); // FIXME nt->_ml_list[type] = tml->ml; diff --git a/coreneuron/mechanism/register_mech.cpp b/coreneuron/mechanism/register_mech.cpp index 01b82814c..498754d80 100644 --- a/coreneuron/mechanism/register_mech.cpp +++ b/coreneuron/mechanism/register_mech.cpp @@ -19,9 +19,7 @@ namespace coreneuron { int secondorder = 0; -nrn_pragma_omp(declare target) double t, dt, celsius, pi; -nrn_pragma_omp(end declare target) int rev_dt; using Pfrv = void (*)(); @@ -117,6 +115,8 @@ int register_mech(const char** m, mod_f_t jacob, mod_f_t stat, mod_f_t initialize, + mod_f_t private_constructor, + mod_f_t private_destructor, int /* nrnpointerindex */, int vectorized) { auto& memb_func = corenrn.get_memb_funcs(); @@ -144,6 +144,8 @@ int register_mech(const char** m, memb_func[type].initialize = initialize; memb_func[type].constructor = nullptr; memb_func[type].destructor = nullptr; + memb_func[type].private_constructor = private_constructor; + memb_func[type].private_destructor = private_destructor; #if VECTORIZE memb_func[type].vectorized = vectorized ? 1 : 0; memb_func[type].thread_size_ = vectorized ? (vectorized - 1) : 0; @@ -340,12 +342,23 @@ int point_register_mech(const char** m, mod_f_t jacob, mod_f_t stat, mod_f_t initialize, + mod_f_t private_constructor, + mod_f_t private_destructor, int nrnpointerindex, mod_f_t constructor, mod_f_t destructor, int vectorized) { const Symbol* s = m[1]; - register_mech(m, alloc, cur, jacob, stat, initialize, nrnpointerindex, vectorized); + register_mech(m, + alloc, + cur, + jacob, + stat, + initialize, + private_constructor, + private_destructor, + nrnpointerindex, + vectorized); register_constructor(constructor); register_destructor(destructor); return point_reg_helper(s); @@ -417,8 +430,7 @@ void _nrn_thread_reg1(int i, void (*f)(ThreadDatum*)) { corenrn.get_memb_func(i).thread_mem_init_ = f; } -void _nrn_thread_table_reg(int i, - void (*f)(int, int, double*, Datum*, ThreadDatum*, NrnThread*, int)) { +void _nrn_thread_table_reg(int i, thread_table_check_t f) { if (i == -1) return; diff --git a/coreneuron/mechanism/register_mech.hpp b/coreneuron/mechanism/register_mech.hpp index df80d958a..07fa1ca5c 100644 --- a/coreneuron/mechanism/register_mech.hpp +++ b/coreneuron/mechanism/register_mech.hpp @@ -18,7 +18,7 @@ extern void hoc_reg_bbcore_read(int type, bbcore_read_t f); extern void hoc_reg_bbcore_write(int type, bbcore_write_t f); extern void _nrn_thread_table_reg( int i, - void (*f)(int, int, double*, Datum*, ThreadDatum*, NrnThread*, int)); + void (*f)(int, int, double*, Datum*, ThreadDatum*, NrnThread*, Memb_list*, int)); extern void alloc_mech(int); } // namespace coreneuron diff --git a/coreneuron/mpi/core/nrnmpi.cpp b/coreneuron/mpi/core/resolve.cpp similarity index 100% rename from coreneuron/mpi/core/nrnmpi.cpp rename to coreneuron/mpi/core/resolve.cpp diff --git a/coreneuron/mpi/nrnmpi.h b/coreneuron/mpi/nrnmpi.h index 04df699ff..03a1d2461 100644 --- a/coreneuron/mpi/nrnmpi.h +++ b/coreneuron/mpi/nrnmpi.h @@ -81,7 +81,7 @@ struct mpi_function>: mpi_function_ba using mpi_function_base::mpi_function_base; template // in principle deducible from `function_ptr` auto operator()(Args&&... args) const { -#ifdef CORENRN_ENABLE_MPI_DYNAMIC +#ifdef CORENEURON_ENABLE_MPI_DYNAMIC // Dynamic MPI, m_fptr should have been initialised via dlsym. assert(m_fptr); return (*reinterpret_cast(m_fptr))(std::forward(args)...); diff --git a/coreneuron/network/cvodestb.cpp b/coreneuron/network/cvodestb.cpp index 31c18807e..bd3de5f4c 100644 --- a/coreneuron/network/cvodestb.cpp +++ b/coreneuron/network/cvodestb.cpp @@ -84,16 +84,4 @@ void fixed_play_continuous(NrnThread* nt) { } } -// NOTE : this implementation is duplicated in "coreneuron/mechanism/nrnoc_ml.ispc" -// for the ISPC backend. If changes are required, make sure to change ISPC as well. -nrn_pragma_omp(declare target) -int at_time(NrnThread* nt, double te) { - double x = te - 1e-11; - if (x <= nt->_t && x > (nt->_t - nt->_dt)) { - return 1; - } - return 0; -} -nrn_pragma_omp(end declare target) - } // namespace coreneuron diff --git a/coreneuron/network/partrans.cpp b/coreneuron/network/partrans.cpp index ddfb49421..28fee5d86 100644 --- a/coreneuron/network/partrans.cpp +++ b/coreneuron/network/partrans.cpp @@ -133,12 +133,9 @@ void nrnthread_v_transfer(NrnThread* _nt) { void nrn_partrans::copy_gap_indices_to_device() { // Ensure index vectors, src_gather, and insrc_buf_ are on the gpu. if (insrcdspl_) { - int n_insrc_buf = insrcdspl_[nrnmpi_numprocs]; - static_cast(n_insrc_buf); - nrn_pragma_acc(enter data create(insrc_buf_[:n_insrc_buf])) - // clang-format off - nrn_pragma_omp(target enter data map(alloc: insrc_buf_[:n_insrc_buf])) - // clang-format off + // TODO: we don't actually need to copy here, just allocate + associate + // storage on the device + cnrn_target_copyin(insrc_buf_, insrcdspl_[nrnmpi_numprocs]); } for (int tid = 0; tid < nrn_nthread; ++tid) { const NrnThread* nt = nrn_threads + tid; @@ -150,13 +147,9 @@ void nrn_partrans::copy_gap_indices_to_device() { if (!ttd.src_indices.empty()) { cnrn_target_copyin(ttd.src_indices.data(), ttd.src_indices.size()); - - size_t n_src_gather = ttd.src_gather.size(); - const double* src_gather = ttd.src_gather.data(); - static_cast(n_src_gather); - static_cast(src_gather); - nrn_pragma_acc(enter data create(src_gather[:n_src_gather])) - nrn_pragma_omp(target enter data map(alloc: src_gather[:n_src_gather])) + // TODO: we don't actually need to copy here, just allocate + + // associate storage on the device. + cnrn_target_copyin(ttd.src_gather.data(), ttd.src_gather.size()); } if (ttd.insrc_indices.size()) { diff --git a/coreneuron/nrnconf.h b/coreneuron/nrnconf.h index b25a2764a..7e4cb6d4e 100644 --- a/coreneuron/nrnconf.h +++ b/coreneuron/nrnconf.h @@ -32,17 +32,9 @@ using Symbol = char; #define VEC_AREA(i) (_nt->_actual_area[(i)]) #define VECTORIZE 1 -// extern variables require acc declare -nrn_pragma_omp(declare target) extern double celsius; -nrn_pragma_acc(declare create(celsius)) - extern double pi; -nrn_pragma_acc(declare create(pi)) - extern int secondorder; -nrn_pragma_acc(declare create(secondorder)) -nrn_pragma_omp(end declare target) extern double t, dt; extern int rev_dt; diff --git a/coreneuron/nrnoc/nrnunits_modern.h b/coreneuron/nrnoc/nrnunits_modern.h deleted file mode 100644 index d93638841..000000000 --- a/coreneuron/nrnoc/nrnunits_modern.h +++ /dev/null @@ -1,36 +0,0 @@ -/* -# ============================================================================= -# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL -# -# See top-level LICENSE file for details. -# ============================================================================= -*/ - -#pragma once - -/** - NMODL translated MOD files get unit constants typically from - share/lib/nrnunits.lib.in. But there were other source files that - hardcode some of the constants. Here we gather a few modern units into - a single place (but, unfortunately, also in nrnunits.lib.in). Legacy units - cannot be gathered here because they can differ slightly from place to place. - - These come from https://physics.nist.gov/cuu/Constants/index.html. - Termed the "2018 CODATA recommended values", they became available - on 20 May 2019 and replace the 2014 CODATA set. - - See oc/hoc_init.c, nrnoc/eion.c, nrniv/kschan.h -**/ - - -#define _electron_charge_codata2018 1.602176634e-19 /* coulomb exact*/ -#define _avogadro_number_codata2018 6.02214076e+23 /* exact */ -#define _boltzmann_codata2018 1.380649e-23 /* joule/K exact */ -#define _faraday_codata2018 \ - (_electron_charge_codata2018 * _avogadro_number_codata2018) /* 96485.33212... coulomb/mol */ -#define _gasconstant_codata2018 \ - (_boltzmann_codata2018 * _avogadro_number_codata2018) /* 8.314462618... joule/mol-K */ - -/* e/k in K/millivolt */ -#define _e_over_k_codata2018 \ - (.001 * _electron_charge_codata2018 / _boltzmann_codata2018) /* 11.604518... K/mV */ diff --git a/coreneuron/sim/fast_imem.cpp b/coreneuron/sim/fast_imem.cpp index 1218b7967..d3b463a48 100644 --- a/coreneuron/sim/fast_imem.cpp +++ b/coreneuron/sim/fast_imem.cpp @@ -21,9 +21,9 @@ bool nrn_use_fast_imem; void fast_imem_free() { for (auto nt = nrn_threads; nt < nrn_threads + nrn_nthread; ++nt) { if (nt->nrn_fast_imem) { - free(nt->nrn_fast_imem->nrn_sav_rhs); - free(nt->nrn_fast_imem->nrn_sav_d); - free(nt->nrn_fast_imem); + free_memory(nt->nrn_fast_imem->nrn_sav_rhs); + free_memory(nt->nrn_fast_imem->nrn_sav_d); + free_memory(nt->nrn_fast_imem); nt->nrn_fast_imem = nullptr; } } @@ -34,7 +34,7 @@ void nrn_fast_imem_alloc() { fast_imem_free(); for (auto nt = nrn_threads; nt < nrn_threads + nrn_nthread; ++nt) { int n = nt->end; - nt->nrn_fast_imem = (NrnFastImem*) ecalloc(1, sizeof(NrnFastImem)); + nt->nrn_fast_imem = (NrnFastImem*) ecalloc_align(1, sizeof(NrnFastImem)); nt->nrn_fast_imem->nrn_sav_rhs = (double*) ecalloc_align(n, sizeof(double)); nt->nrn_fast_imem->nrn_sav_d = (double*) ecalloc_align(n, sizeof(double)); } diff --git a/coreneuron/sim/multicore.cpp b/coreneuron/sim/multicore.cpp index d5368a29c..b8dd293d2 100644 --- a/coreneuron/sim/multicore.cpp +++ b/coreneuron/sim/multicore.cpp @@ -61,7 +61,8 @@ static int table_check_cnt_; static ThreadDatum* table_check_; -NrnThreadMembList* create_tml(int mech_id, +NrnThreadMembList* create_tml(NrnThread& nt, + int mech_id, Memb_func& memb_func, int& shadow_rhs_cnt, const std::vector& mech_types, @@ -91,6 +92,10 @@ NrnThreadMembList* create_tml(int mech_id, } } + if (auto* const priv_ctor = corenrn.get_memb_func(tml->index).private_constructor) { + priv_ctor(&nt, tml->ml, tml->index); + } + return tml; } @@ -166,7 +171,7 @@ void nrn_thread_table_check() { auto tml = static_cast(table_check_[i + 1]._pvoid); Memb_list* ml = tml->ml; (*corenrn.get_memb_func(tml->index).thread_table_check_)( - 0, ml->_nodecount_padded, ml->data, ml->pdata, ml->_thread, &nt, tml->index); + 0, ml->_nodecount_padded, ml->data, ml->pdata, ml->_thread, &nt, ml, tml->index); } } } // namespace coreneuron diff --git a/coreneuron/sim/multicore.hpp b/coreneuron/sim/multicore.hpp index c9b3cb58e..a6ac50be0 100644 --- a/coreneuron/sim/multicore.hpp +++ b/coreneuron/sim/multicore.hpp @@ -36,7 +36,8 @@ struct NrnThreadMembList { /* patterned after CvMembList in cvodeobj.h */ int* dependencies; /* list of mechanism types that this mechanism depends on*/ int ndependencies; /* for scheduling we need to know the dependency count */ }; -NrnThreadMembList* create_tml(int mech_id, +NrnThreadMembList* create_tml(NrnThread& nt, + int mech_id, Memb_func& memb_func, int& shadow_rhs_cnt, const std::vector& mech_types, @@ -163,6 +164,7 @@ void nrn_multithread_job(F&& job, Args&&... args) { #pragma omp parallel for private(i) shared(nrn_threads, job, nrn_nthread, \ nrnmpi_myid) schedule(static, 1) + // FIXME: multiple forwarding of the same arguments... for (i = 0; i < nrn_nthread; ++i) { job(nrn_threads + i, std::forward(args)...); } @@ -192,6 +194,13 @@ extern void direct_mode_initialize(); extern void nrn_mk_table_check(void); extern void nonvint(NrnThread* _nt); extern void update(NrnThread*); - - +// NOTE : this implementation is duplicated in "coreneuron/mechanism/nrnoc_ml.ispc" +// for the ISPC backend. If changes are required, make sure to change ISPC as well. +constexpr int at_time(NrnThread* nt, double te) { + double x = te - 1e-11; + if (x <= nt->_t && x > (nt->_t - nt->_dt)) { + return 1; + } + return 0; +} } // namespace coreneuron diff --git a/coreneuron/sim/scopmath/errcodes.h b/coreneuron/sim/scopmath/errcodes.h index 5f32c5785..94d08f73c 100644 --- a/coreneuron/sim/scopmath/errcodes.h +++ b/coreneuron/sim/scopmath/errcodes.h @@ -1,16 +1,33 @@ -/****************************************************************************** - * - * File: errcodes.h - * - * Copyright (c) 1984, 1985, 1986, 1987, 1988, 1989, 1990 - * Duke University - * - * errcodes.h,v 1.1.1.1 1994/10/12 17:22:18 hines Exp - * - ******************************************************************************/ +/* +# ============================================================================= +# Originally errcodes.h from SCoP library, Copyright (c) 1984-90 Duke University +# ============================================================================= +# Subsequent extensive prototype and memory layout changes for CoreNEURON +# +# Copyright (c) 2016 - 2022 Blue Brain Project/EPFL +# +# See top-level LICENSE file for details. +# =============================================================================. +*/ +#pragma once namespace coreneuron { extern int abort_run(int); -} +namespace scopmath { +/** @brief Flag to disable some code sections at compile time. + * + * Some methods, such as coreneuron::scopmath::sparse::getelm(...), decide at + * runtime whether they are simply accessors, or if they dynamically modify the + * matrix in question, possibly allocating new memory. Typically the second + * mode will be used during model initialisation, while the first will be used + * during computation/simulation. Compiling the more complicated code for the + * second mode can be problematic for targets such as GPU, where dynamic + * allocation and global state are complex. This enum is intended to be used as + * a template parameter to flag (at compile time) when this code can be + * omitted. + */ +enum struct enabled_code { all, compute_only }; +} // namespace scopmath +} // namespace coreneuron #define ROUNDOFF 1.e-20 #define ZERO 1.e-8 #define STEP 1.e-6 diff --git a/coreneuron/sim/scopmath/sparse_thread.hpp b/coreneuron/sim/scopmath/sparse_thread.hpp index 6614a0a1b..8d84cbb0e 100644 --- a/coreneuron/sim/scopmath/sparse_thread.hpp +++ b/coreneuron/sim/scopmath/sparse_thread.hpp @@ -67,7 +67,8 @@ inline void increase_order(SparseObj* so, unsigned row) { * biggest difference is that elements are no longer removed and this saves much * time allocating and freeing during the solve phase. */ -inline Elm* getelm(SparseObj* so, unsigned row, unsigned col, Elm* new_elem) { +template +Elm* getelm(SparseObj* so, unsigned row, unsigned col, Elm* new_elem) { Elm *el, *elnext; unsigned vrow = so->varord[row]; @@ -90,10 +91,14 @@ inline Elm* getelm(SparseObj* so, unsigned row, unsigned col, Elm* new_elem) { } /* insert below el */ if (!new_elem) { - new_elem = new Elm{}; - // Using array-new here causes problems in GPU compilation. - new_elem->value = static_cast(std::malloc(so->_cntml_padded * sizeof(double))); - increase_order(so, row); + if constexpr (code_to_enable == enabled_code::compute_only) { + // Dynamic allocation should not happen during the compute phase. + assert(false); + } else { + new_elem = new Elm{}; + new_elem->value = new double[so->_cntml_padded]; + increase_order(so, row); + } } new_elem->r_down = el->r_down; el->r_down = new_elem; @@ -133,9 +138,13 @@ inline Elm* getelm(SparseObj* so, unsigned row, unsigned col, Elm* new_elem) { } /* insert above el */ if (!new_elem) { - new_elem = new Elm{}; - new_elem->value = static_cast(std::malloc(so->_cntml_padded * sizeof(double))); - increase_order(so, row); + if constexpr (code_to_enable == enabled_code::compute_only) { + assert(false); + } else { + new_elem = new Elm{}; + new_elem->value = new double[so->_cntml_padded]; + increase_order(so, row); + } } new_elem->r_up = el->r_up; el->r_up = new_elem; @@ -491,16 +500,13 @@ void create_coef_list(SparseObj* so, int n, SPFUN fun, _threadargsproto_) { fun(so, so->rhs, _threadargs_); // std::invoke in C++17 so->phase = 0; } -} // namespace sparse -} // namespace scopmath -// Methods that may be called from translated MOD files are kept outside the -// scopmath::sparse namespace. -inline double* _nrn_thread_getelm(SparseObj* so, int row, int col, int _iml) { +template +double* thread_getelm(SparseObj* so, int row, int col, int _iml) { if (!so->phase) { return so->coef_list[so->ngetcall[_iml]++]; } - Elm* el = scopmath::sparse::getelm(so, (unsigned) row, (unsigned) col, nullptr); + Elm* el = scopmath::sparse::getelm(so, (unsigned) row, (unsigned) col, nullptr); if (so->phase == 1) { so->ngetcall[_iml]++; } else { @@ -508,7 +514,11 @@ inline double* _nrn_thread_getelm(SparseObj* so, int row, int col, int _iml) { } return el->value; } +} // namespace sparse +} // namespace scopmath +// Methods that may be called from translated MOD files are kept outside the +// scopmath::sparse namespace. #define scopmath_sparse_s(arg) _p[scopmath_sparse_ix(s[arg])] #define scopmath_sparse_d(arg) _p[scopmath_sparse_ix(d[arg])] diff --git a/coreneuron/utils/ispc/globals.cpp b/coreneuron/utils/ispc/globals.cpp deleted file mode 100644 index 0344bf1b8..000000000 --- a/coreneuron/utils/ispc/globals.cpp +++ /dev/null @@ -1,17 +0,0 @@ -/* -# ============================================================================= -# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL -# -# See top-level LICENSE file for details. -# ============================================================================= -*/ - -/* - * Coreneuron global variables are declared at least in the coreneuron namespace. In ispc it is, - * however, not possible to access variables within C++ namespaces. To be able to access these - * variables from ispc kernels, we declare them in global namespace and a C linkage file. - */ - -extern "C" { -double ispc_celsius; -} diff --git a/coreneuron/utils/memory.cpp b/coreneuron/utils/memory.cpp index 70d928b63..8f45487dc 100644 --- a/coreneuron/utils/memory.cpp +++ b/coreneuron/utils/memory.cpp @@ -15,7 +15,7 @@ #include namespace coreneuron { -bool unified_memory_enabled() { +bool gpu_enabled() { #ifdef CORENEURON_ENABLE_GPU return corenrn_param.gpu; #else diff --git a/coreneuron/utils/memory.h b/coreneuron/utils/memory.h index 9a2e65645..9e612680c 100644 --- a/coreneuron/utils/memory.h +++ b/coreneuron/utils/memory.h @@ -1,6 +1,6 @@ /* # ============================================================================= -# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL +# Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. @@ -22,13 +22,13 @@ #endif namespace coreneuron { -/** @brief Check if allocate_unified will return a unified memory address. +/** + * @brief Check if GPU support is enabled. * - * If false, [de]allocate_unified simply forward to new/delete. It is - * convenient to include this method here to avoid having to access - * corenrn_param directly. + * This returns true if GPU support was enabled at compile time and at runtime + * via coreneuron.gpu = True and/or --gpu, otherwise it returns false. */ -bool unified_memory_enabled(); +bool gpu_enabled(); /** @brief Allocate unified memory in GPU builds iff GPU enabled, otherwise new */ diff --git a/coreneuron/utils/nrnoc_aux.hpp b/coreneuron/utils/nrnoc_aux.hpp index 3c2f23326..10b5880ea 100644 --- a/coreneuron/utils/nrnoc_aux.hpp +++ b/coreneuron/utils/nrnoc_aux.hpp @@ -34,9 +34,4 @@ extern void hoc_execerror(const char*, const char*); /* print and abort */ extern void hoc_warning(const char*, const char*); extern double hoc_Exp(double x); - -// defined in eion.cpp and this file included in translated mod files. -extern double nrn_nernst(double ci, double co, double z, double celsius); -extern double nrn_ghk(double v, double ci, double co, double z); - } // namespace coreneuron diff --git a/coreneuron/utils/offload.hpp b/coreneuron/utils/offload.hpp index 078990107..6297221e8 100644 --- a/coreneuron/utils/offload.hpp +++ b/coreneuron/utils/offload.hpp @@ -1,6 +1,6 @@ /* # ============================================================================= -# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL +# Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= @@ -19,48 +19,121 @@ #else #define nrn_pragma_acc(x) #define nrn_pragma_omp(x) -#include #endif #include +#include +#include namespace coreneuron { +void cnrn_target_copyin_debug(std::string_view file, + int line, + std::size_t sizeof_T, + std::type_info const& typeid_T, + void const* h_ptr, + std::size_t len, + void* d_ptr); +void cnrn_target_delete_debug(std::string_view file, + int line, + std::size_t sizeof_T, + std::type_info const& typeid_T, + void const* h_ptr, + std::size_t len); +void cnrn_target_deviceptr_debug(std::string_view file, + int line, + std::type_info const& typeid_T, + void const* h_ptr, + void* d_ptr); +void cnrn_target_is_present_debug(std::string_view file, + int line, + std::type_info const& typeid_T, + void const* h_ptr, + void* d_ptr); +void cnrn_target_memcpy_to_device_debug(std::string_view file, + int line, + std::size_t sizeof_T, + std::type_info const& typeid_T, + void const* h_ptr, + std::size_t len, + void* d_ptr); +#if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_UNIFIED_MEMORY) && \ + defined(__NVCOMPILER_MAJOR__) && defined(__NVCOMPILER_MINOR__) && \ + (__NVCOMPILER_MAJOR__ <= 22) && (__NVCOMPILER_MINOR__ <= 3) +// Homegrown implementation for buggy NVHPC versions (<=22.3), see +// https://forums.developer.nvidia.com/t/acc-deviceptr-does-not-work-in-openacc-code-dynamically-loaded-from-a-shared-library/211599 +#define CORENEURON_ENABLE_PRESENT_TABLE +std::pair cnrn_target_deviceptr_impl(bool must_be_present_or_null, void const* h_ptr); +void cnrn_target_copyin_update_present_table(void const* h_ptr, void* d_ptr, std::size_t len); +void cnrn_target_delete_update_present_table(void const* h_ptr, std::size_t len); +#endif + template -T* cnrn_target_deviceptr(const T* h_ptr) { -#if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ +T* cnrn_target_deviceptr_or_present(std::string_view file, + int line, + bool must_be_present_or_null, + const T* h_ptr) { + T* d_ptr{}; + bool error{false}; +#ifdef CORENEURON_ENABLE_PRESENT_TABLE + auto const d_ptr_and_error = cnrn_target_deviceptr_impl(must_be_present_or_null, h_ptr); + d_ptr = static_cast(d_ptr_and_error.first); + error = d_ptr_and_error.second; +#elif defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENACC) - return static_cast(acc_deviceptr(const_cast(h_ptr))); + d_ptr = static_cast(acc_deviceptr(const_cast(h_ptr))); #elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENMP) - T const* d_ptr{}; - - nrn_pragma_omp(target data use_device_ptr(h_ptr)) - { d_ptr = h_ptr; } - - return const_cast(d_ptr); + if (must_be_present_or_null || omp_target_is_present(h_ptr, omp_get_default_device())) { + nrn_pragma_omp(target data use_device_ptr(h_ptr)) + { d_ptr = const_cast(h_ptr); } + } #else - throw std::runtime_error( - "cnrn_target_deviceptr() not implemented without OpenACC/OpenMP and gpu build"); + if (must_be_present_or_null && h_ptr) { + throw std::runtime_error( + "cnrn_target_deviceptr() not implemented without OpenACC/OpenMP and gpu build"); + } #endif + if (must_be_present_or_null) { + cnrn_target_deviceptr_debug(file, line, typeid(T), h_ptr, d_ptr); + } else { + cnrn_target_is_present_debug(file, line, typeid(T), h_ptr, d_ptr); + } + if (error) { + throw std::runtime_error( + "cnrn_target_deviceptr() encountered an error, you may want to try setting " + "CORENEURON_GPU_DEBUG=1"); + } + return d_ptr; } template -T* cnrn_target_copyin(const T* h_ptr, std::size_t len = 1) { +T* cnrn_target_copyin(std::string_view file, int line, const T* h_ptr, std::size_t len = 1) { + T* d_ptr{}; #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENACC) - return static_cast(acc_copyin(const_cast(h_ptr), len * sizeof(T))); + d_ptr = static_cast(acc_copyin(const_cast(h_ptr), len * sizeof(T))); #elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENMP) nrn_pragma_omp(target enter data map(to : h_ptr[:len])) - return cnrn_target_deviceptr(h_ptr); + nrn_pragma_omp(target data use_device_ptr(h_ptr)) + { d_ptr = const_cast(h_ptr); } #else throw std::runtime_error( "cnrn_target_copyin() not implemented without OpenACC/OpenMP and gpu build"); #endif +#ifdef CORENEURON_ENABLE_PRESENT_TABLE + cnrn_target_copyin_update_present_table(h_ptr, d_ptr, len * sizeof(T)); +#endif + cnrn_target_copyin_debug(file, line, sizeof(T), typeid(T), h_ptr, len, d_ptr); + return d_ptr; } template -void cnrn_target_delete(T* h_ptr, std::size_t len = 1) { +void cnrn_target_delete(std::string_view file, int line, T* h_ptr, std::size_t len = 1) { + cnrn_target_delete_debug(file, line, sizeof(T), typeid(T), h_ptr, len); +#ifdef CORENEURON_ENABLE_PRESENT_TABLE + cnrn_target_delete_update_present_table(h_ptr, len * sizeof(T)); +#endif #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENACC) acc_delete(h_ptr, len * sizeof(T)); @@ -74,7 +147,12 @@ void cnrn_target_delete(T* h_ptr, std::size_t len = 1) { } template -void cnrn_target_memcpy_to_device(T* d_ptr, const T* h_ptr, std::size_t len = 1) { +void cnrn_target_memcpy_to_device(std::string_view file, + int line, + T* d_ptr, + const T* h_ptr, + std::size_t len = 1) { + cnrn_target_memcpy_to_device_debug(file, line, sizeof(T), typeid(T), h_ptr, len, d_ptr); #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENACC) acc_memcpy_to_device(d_ptr, const_cast(h_ptr), len * sizeof(T)); @@ -93,4 +171,25 @@ void cnrn_target_memcpy_to_device(T* d_ptr, const T* h_ptr, std::size_t len = 1) #endif } +template +void cnrn_target_update_on_device(std::string_view file, + int line, + const T* h_ptr, + std::size_t len = 1) { + auto* d_ptr = cnrn_target_deviceptr_or_present(file, line, true, h_ptr); + cnrn_target_memcpy_to_device(file, line, d_ptr, h_ptr); +} + +// Replace with std::source_location once we have C++20 +#define cnrn_target_copyin(...) cnrn_target_copyin(__FILE__, __LINE__, __VA_ARGS__) +#define cnrn_target_delete(...) cnrn_target_delete(__FILE__, __LINE__, __VA_ARGS__) +#define cnrn_target_is_present(...) \ + cnrn_target_deviceptr_or_present(__FILE__, __LINE__, false, __VA_ARGS__) +#define cnrn_target_deviceptr(...) \ + cnrn_target_deviceptr_or_present(__FILE__, __LINE__, true, __VA_ARGS__) +#define cnrn_target_memcpy_to_device(...) \ + cnrn_target_memcpy_to_device(__FILE__, __LINE__, __VA_ARGS__) +#define cnrn_target_update_on_device(...) \ + cnrn_target_update_on_device(__FILE__, __LINE__, __VA_ARGS__) + } // namespace coreneuron diff --git a/coreneuron/utils/randoms/nrnran123.cpp b/coreneuron/utils/randoms/nrnran123.cpp index 77ff88fb3..14e2b15df 100644 --- a/coreneuron/utils/randoms/nrnran123.cpp +++ b/coreneuron/utils/randoms/nrnran123.cpp @@ -5,21 +5,22 @@ # See top-level LICENSE file for details. # =============================================================================. */ +#include "coreneuron/gpu/nrn_acc_manager.hpp" #include "coreneuron/mpi/core/nrnmpi.hpp" #include "coreneuron/utils/memory.h" #include "coreneuron/utils/nrnmutdec.hpp" #include "coreneuron/utils/randoms/nrnran123.h" -#include -#include -#include -#include - #ifdef CORENEURON_USE_BOOST_POOL #include #include #endif +#include +#include +#include +#include + // Defining these attributes seems to help nvc++ in OpenMP target offload mode. #if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENMP) && defined(__CUDACC__) @@ -76,25 +77,28 @@ using random123_allocator = coreneuron::unified_allocatorc, g_k); +#ifdef __CUDACC__ +#define g_k_qualifiers __device__ __constant__ +#else +#define g_k_qualifiers +#endif +g_k_qualifiers philox4x32_key_t g_k{}; +// Cannot refer to g_k directly from a nrn_pragma_acc(routine seq) method like +// coreneuron_random123_philox4x32_helper, and cannot have this inlined there at +// higher optimisation levels +__attribute__((noinline)) philox4x32_key_t& global_state() { + return g_k; } } // namespace +CORENRN_HOST_DEVICE philox4x32_ctr_t +coreneuron_random123_philox4x32_helper(coreneuron::nrnran123_State* s) { + return philox4x32(s->c, global_state()); +} + namespace coreneuron { std::size_t nrnran123_instance_count() { return g_instance_count; @@ -102,84 +106,13 @@ std::size_t nrnran123_instance_count() { /* if one sets the global, one should reset all the stream sequences. */ uint32_t nrnran123_get_globalindex() { - return g_k.v[0]; -} - -void nrnran123_getseq(nrnran123_State* s, uint32_t* seq, char* which) { - *seq = s->c.v[0]; - *which = s->which_; -} - -void nrnran123_setseq(nrnran123_State* s, uint32_t seq, char which) { - if (which > 3) { - s->which_ = 0; - } else { - s->which_ = which; - } - s->c.v[0] = seq; - s->r = philox4x32_helper(s); -} - -void nrnran123_getids(nrnran123_State* s, uint32_t* id1, uint32_t* id2) { - *id1 = s->c.v[2]; - *id2 = s->c.v[3]; -} - -void nrnran123_getids3(nrnran123_State* s, uint32_t* id1, uint32_t* id2, uint32_t* id3) { - *id3 = s->c.v[1]; - *id1 = s->c.v[2]; - *id2 = s->c.v[3]; -} - -uint32_t nrnran123_ipick(nrnran123_State* s) { - uint32_t rval; - char which = s->which_; - rval = s->r.v[int{which++}]; - if (which > 3) { - which = 0; - s->c.v[0]++; - s->r = philox4x32_helper(s); - } - s->which_ = which; - return rval; -} - -double nrnran123_dblpick(nrnran123_State* s) { - return nrnran123_uint2dbl(nrnran123_ipick(s)); -} - -double nrnran123_negexp(nrnran123_State* s) { - /* min 2.3283064e-10 to max 22.18071 */ - return -std::log(nrnran123_dblpick(s)); -} - -/* at cost of a cached value we could compute two at a time. */ -double nrnran123_normal(nrnran123_State* s) { - double w, x, y; - double u1, u2; - - do { - u1 = nrnran123_dblpick(s); - u2 = nrnran123_dblpick(s); - u1 = 2. * u1 - 1.; - u2 = 2. * u2 - 1.; - w = (u1 * u1) + (u2 * u2); - } while (w > 1); - - y = std::sqrt((-2. * log(w)) / w); - x = u1 * y; - return x; -} - -double nrnran123_uint2dbl(uint32_t u) { - /* 0 to 2^32-1 transforms to double value in open (0,1) interval */ - /* min 2.3283064e-10 to max (1 - 2.3283064e-10) */ - return ((double) u + 1.0) * SHIFT32; + return global_state().v[0]; } /* nrn123 streams are created from cpu launcher routine */ void nrnran123_set_globalindex(uint32_t gix) { // If the global seed is changing then we shouldn't have any active streams. + auto& g_k = global_state(); { std::lock_guard _{g_instance_count_mutex}; if (g_instance_count != 0 && nrnmpi_myid == 0) { @@ -190,9 +123,40 @@ void nrnran123_set_globalindex(uint32_t gix) { << g_k.v[0] << ')' << std::endl; } } - g_k.v[0] = gix; - nrn_pragma_acc(update device(g_k)) - nrn_pragma_omp(target update to(g_k)) + if (g_k.v[0] != gix) { + g_k.v[0] = gix; + if (coreneuron::gpu_enabled()) { +#ifdef __CUDACC__ + { + auto const code = cudaMemcpyToSymbol(g_k, &g_k, sizeof(g_k)); + assert(code == cudaSuccess); + } + { + auto const code = cudaDeviceSynchronize(); + assert(code == cudaSuccess); + } +#else + nrn_pragma_acc(update device(g_k)) + nrn_pragma_omp(target update to(g_k)) +#endif + } + } +} + +void nrnran123_initialise_global_state_on_device() { + if (coreneuron::gpu_enabled()) { +#ifndef __CUDACC__ + nrn_pragma_acc(enter data copyin(g_k)) +#endif + } +} + +void nrnran123_destroy_global_state_on_device() { + if (coreneuron::gpu_enabled()) { +#ifndef __CUDACC__ + nrn_pragma_acc(exit data delete (g_k)) +#endif + } } /** @brief Allocate a new Random123 stream. diff --git a/coreneuron/utils/randoms/nrnran123.h b/coreneuron/utils/randoms/nrnran123.h index ccd3fa5db..d4108612d 100644 --- a/coreneuron/utils/randoms/nrnran123.h +++ b/coreneuron/utils/randoms/nrnran123.h @@ -39,6 +39,8 @@ of the full distribution available from #include #include +#include + // Some files are compiled with DISABLE_OPENACC, and some builds have no GPU // support at all. In these two cases, request that the random123 state is // allocated using new/delete instead of CUDA unified memory. @@ -56,9 +58,19 @@ struct nrnran123_State { char which_; }; -struct nrnran123_array4x32 { - uint32_t v[4]; -}; +} // namespace coreneuron + +/** @brief Provide a helper function in global namespace that is declared target for OpenMP + * offloading to function correctly with NVHPC + */ +nrn_pragma_acc(routine seq) +nrn_pragma_omp(declare target) +philox4x32_ctr_t coreneuron_random123_philox4x32_helper(coreneuron::nrnran123_State* s); +nrn_pragma_omp(end declare target) + +namespace coreneuron { +void nrnran123_initialise_global_state_on_device(); +void nrnran123_destroy_global_state_on_device(); /* global index. eg. run number */ /* all generator instances share this global index */ @@ -86,38 +98,75 @@ void nrnran123_deletestream(nrnran123_State* s, bool use_unified_memory = CORENRN_RAN123_USE_UNIFIED_MEMORY); /* minimal data stream */ -nrn_pragma_omp(declare target) -nrn_pragma_acc(routine seq) -void nrnran123_getseq(nrnran123_State*, uint32_t* seq, char* which); -nrn_pragma_acc(routine seq) -void nrnran123_getids(nrnran123_State*, uint32_t* id1, uint32_t* id2); -nrn_pragma_acc(routine seq) -void nrnran123_getids3(nrnran123_State*, uint32_t* id1, uint32_t* id2, uint32_t* id3); -nrn_pragma_acc(routine seq) -uint32_t nrnran123_ipick(nrnran123_State*); /* uniform 0 to 2^32-1 */ +constexpr void nrnran123_getseq(nrnran123_State* s, uint32_t* seq, char* which) { + *seq = s->c.v[0]; + *which = s->which_; +} +constexpr void nrnran123_getids(nrnran123_State* s, uint32_t* id1, uint32_t* id2) { + *id1 = s->c.v[2]; + *id2 = s->c.v[3]; +} +constexpr void nrnran123_getids3(nrnran123_State* s, uint32_t* id1, uint32_t* id2, uint32_t* id3) { + *id3 = s->c.v[1]; + *id1 = s->c.v[2]; + *id2 = s->c.v[3]; +} -/* this could be called from openacc parallel construct */ -nrn_pragma_acc(routine seq) -double nrnran123_dblpick(nrnran123_State*); /* uniform open interval (0,1)*/ -/* nrnran123_dblpick minimum value is 2.3283064e-10 and max value is 1-min */ +// Uniform 0 to 2*32-1 +inline uint32_t nrnran123_ipick(nrnran123_State* s) { + char which = s->which_; + uint32_t rval{s->r.v[int{which++}]}; + if (which > 3) { + which = 0; + s->c.v[0]++; + s->r = coreneuron_random123_philox4x32_helper(s); + } + s->which_ = which; + return rval; +} + +constexpr double nrnran123_uint2dbl(uint32_t u) { + constexpr double SHIFT32 = 1.0 / 4294967297.0; /* 1/(2^32 + 1) */ + /* 0 to 2^32-1 transforms to double value in open (0,1) interval */ + /* min 2.3283064e-10 to max (1 - 2.3283064e-10) */ + return (static_cast(u) + 1.0) * SHIFT32; +} + +// Uniform open interval (0,1), minimum value is 2.3283064e-10 and max value is 1-min +inline double nrnran123_dblpick(nrnran123_State* s) { + return nrnran123_uint2dbl(nrnran123_ipick(s)); +} /* this could be called from openacc parallel construct (in INITIAL block) */ -nrn_pragma_acc(routine seq) -void nrnran123_setseq(nrnran123_State*, uint32_t seq, char which); -nrn_pragma_acc(routine seq) -double nrnran123_negexp(nrnran123_State*); /* mean 1.0 */ -/* nrnran123_negexp min value is 2.3283064e-10, max is 22.18071 */ +inline void nrnran123_setseq(nrnran123_State* s, uint32_t seq, char which) { + if (which > 3) { + s->which_ = 0; + } else { + s->which_ = which; + } + s->c.v[0] = seq; + s->r = coreneuron_random123_philox4x32_helper(s); +} -/* missing declaration in coreneuron */ -nrn_pragma_acc(routine seq) -double nrnran123_normal(nrnran123_State*); -nrn_pragma_acc(routine seq) -double nrnran123_gauss(nrnran123_State*); /* mean 0.0, std 1.0 */ +// nrnran123_negexp min value is 2.3283064e-10, max is 22.18071, mean 1.0 +inline double nrnran123_negexp(nrnran123_State* s) { + return -std::log(nrnran123_dblpick(s)); +} -/* more fundamental (stateless) (though the global index is still used) */ -nrn_pragma_acc(routine seq) -nrnran123_array4x32 nrnran123_iran(uint32_t seq, uint32_t id1, uint32_t id2); -nrn_pragma_acc(routine seq) -double nrnran123_uint2dbl(uint32_t); -nrn_pragma_omp(end declare target) +/* at cost of a cached value we could compute two at a time. */ +inline double nrnran123_normal(nrnran123_State* s) { + double w, u1; + do { + u1 = nrnran123_dblpick(s); + double u2{nrnran123_dblpick(s)}; + u1 = 2. * u1 - 1.; + u2 = 2. * u2 - 1.; + w = (u1 * u1) + (u2 * u2); + } while (w > 1); + double y{std::sqrt((-2. * std::log(w)) / w)}; + return u1 * y; +} + +// nrnran123_gauss, nrnran123_iran were declared but not defined in CoreNEURON +// nrnran123_array4x32 was declared but not used in CoreNEURON } // namespace coreneuron diff --git a/coreneuron/utils/units.hpp b/coreneuron/utils/units.hpp new file mode 100644 index 000000000..de44343fe --- /dev/null +++ b/coreneuron/utils/units.hpp @@ -0,0 +1,38 @@ +/* +# ============================================================================= +# Copyright (c) 2016 - 2022 Blue Brain Project/EPFL +# +# See top-level LICENSE file for details. +# ============================================================================= +*/ +#pragma once +namespace coreneuron { +namespace units { +#if CORENEURON_USE_LEGACY_UNITS == 1 +constexpr double faraday{96485.309}; +constexpr double gasconstant{8.3134}; +#else +/* NMODL translated MOD files get unit constants typically from + * share/lib/nrnunits.lib.in. But there were other source files that hardcode + * some of the constants. Here we gather a few modern units into a single place + * (but, unfortunately, also in nrnunits.lib.in). Legacy units cannot be + * gathered here because they can differ slightly from place to place. + * + * These come from https://physics.nist.gov/cuu/Constants/index.html. + * Termed the "2018 CODATA recommended values", they became available + * on 20 May 2019 and replace the 2014 CODATA set. + * + * See oc/hoc_init.c, nrnoc/eion.c, nrniv/kschan.h + */ +namespace detail { +constexpr double electron_charge{1.602176634e-19}; // coulomb exact +constexpr double avogadro_number{6.02214076e+23}; // exact +constexpr double boltzmann{1.380649e-23}; // joule/K exact +} // namespace detail +constexpr double faraday{detail::electron_charge * detail::avogadro_number}; // 96485.33212... + // coulomb/mol +constexpr double gasconstant{detail::boltzmann * detail::avogadro_number}; // 8.314462618... + // joule/mol-K +#endif +} // namespace units +} // namespace coreneuron diff --git a/external/mod2c b/external/mod2c index 8565d3c17..469c74dc7 160000 --- a/external/mod2c +++ b/external/mod2c @@ -1 +1 @@ -Subproject commit 8565d3c178a195a489fae0623d6338c2e92cd1e5 +Subproject commit 469c74dc7d96bbc5a06a42696422154b4cd2ce28 diff --git a/external/nmodl b/external/nmodl index 7000ff612..4f45a1c8a 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit 7000ff612208ed8b27837438731903c58d1786e3 +Subproject commit 4f45a1c8a9b99c64127ea795eb12952e754b775c diff --git a/extra/nrnivmodl_core_makefile.in b/extra/nrnivmodl_core_makefile.in index 6601f7123..135a9e722 100644 --- a/extra/nrnivmodl_core_makefile.in +++ b/extra/nrnivmodl_core_makefile.in @@ -38,11 +38,6 @@ MOD_OBJS_DIR = $(OUTPUT_DIR)/corenrn/build # Linked libraries gathered by CMake LDFLAGS = $(LINKFLAGS) @CORENRN_COMMON_LDFLAGS@ -CORENRNLIB_FLAGS = -L$(CORENRN_LIB_DIR) -lcoreneuron -CORENRNLIB_FLAGS += $(if @reportinglib_LIB_DIR@, -W$(subst ;, -W,l,-rpath,@reportinglib_LIB_DIR@),) -CORENRNLIB_FLAGS += $(if @sonatareport_LIB_DIR@, -W$(subst ;, -W,l,-rpath,@sonatareport_LIB_DIR@),) -CORENRNLIB_FLAGS += $(if @caliper_LIB_DIR@, -W$(subst ;, -W,l,-rpath,@caliper_LIB_DIR@),) -CORENRNLIB_FLAGS += $(if @caliper_LIB_DIR@,-L@caliper_LIB_DIR@,) # Includes paths gathered by CMake # coreneuron/utils/randoms goes first because it needs to override the NEURON @@ -82,7 +77,7 @@ ifeq ($(wildcard $(CORENRN_PERLEXE)),) endif CXXFLAGS = @CORENRN_CXX_FLAGS@ -CXX_COMPILE_CMD = $(CXX) $(CXXFLAGS) @CMAKE_CXX_COMPILE_OPTIONS_PIC@ @CORENRN_COMMON_COMPILE_DEFS@ $(INCLUDES) +CXX_COMPILE_CMD = $(CXX) $(CXXFLAGS) @CMAKE_CXX_COMPILE_OPTIONS_PIC@ $(INCLUDES) CXX_LINK_EXE_CMD = $(CXX) $(CXXFLAGS) @CMAKE_EXE_LINKER_FLAGS@ CXX_SHARED_LIB_CMD = $(CXX) $(CXXFLAGS) @CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS@ @CMAKE_SHARED_LIBRARY_CXX_FLAGS@ @CMAKE_SHARED_LINKER_FLAGS@ @@ -207,34 +202,39 @@ endif # main target to build binary -$(SPECIAL_EXE): coremech_lib_target +$(SPECIAL_EXE): $(corenrnmech_lib_target) @printf " => $(C_GREEN)Binary$(C_RESET) creating $(SPECIAL_EXE)\n" $(CXX_LINK_EXE_CMD) -o $(SPECIAL_EXE) $(CORENRN_SHARE_CORENRN_DIR)/coreneuron.cpp \ -I$(CORENRN_INC_DIR) $(INCFLAGS) \ - -L$(OUTPUT_DIR) -l$(COREMECH_LIB_NAME) $(CORENRNLIB_FLAGS) $(LDFLAGS) \ + -L$(OUTPUT_DIR) -l$(COREMECH_LIB_NAME) $(LDFLAGS) \ + -L$(CORENRN_LIB_DIR) \ -Wl,-rpath,'$(LIB_RPATH)' -Wl,-rpath,$(CORENRN_LIB_DIR) -Wl,-rpath,'$(INSTALL_LIB_RPATH)' -coremech_lib_target: $(corenrnmech_lib_target) - rm -rf $(OUTPUT_DIR)/.libs/lib$(COREMECH_LIB_NAME)$(LIB_SUFFIX); \ - mkdir -p $(OUTPUT_DIR)/.libs; \ - ln -s ../lib$(COREMECH_LIB_NAME)$(LIB_SUFFIX) $(OUTPUT_DIR)/.libs/lib$(COREMECH_LIB_NAME)$(LIB_SUFFIX) - $(ENGINEMECH_OBJ): $(CORENRN_SHARE_CORENRN_DIR)/enginemech.cpp | $(MOD_OBJS_DIR) $(CXX_COMPILE_CMD) -c -DADDITIONAL_MECHS $(CORENRN_SHARE_CORENRN_DIR)/enginemech.cpp -o $(ENGINEMECH_OBJ) # build shared library of mechanisms coremech_lib_shared: $(ALL_OBJS) $(ENGINEMECH_OBJ) build_always + # extract the object files from libcoreneuron-core.a + mkdir -p $(MOD_OBJS_DIR)/libcoreneuron-core + rm -f $(MOD_OBJS_DIR)/libcoreneuron-core/*.o + # --output is only supported by modern versions of ar + (cd $(MOD_OBJS_DIR)/libcoreneuron-core && ar x $(CORENRN_LIB_DIR)/libcoreneuron-core.a) $(CXX_SHARED_LIB_CMD) $(ENGINEMECH_OBJ) -o ${COREMECH_LIB_PATH} $(ALL_OBJS) \ -I$(CORENRN_INC_DIR) $(INCFLAGS) \ - $(LDFLAGS) $(CORENRN_LIB_DIR)/libscopmath.a \ - ${SONAME_OPTION} $(CORENRNLIB_FLAGS) -Wl,-rpath,$(CORENRN_LIB_DIR); + @CORENEURON_LINKER_START_GROUP@ \ + $(MOD_OBJS_DIR)/libcoreneuron-core/*.o @CORENEURON_LINKER_END_GROUP@ \ + $(LDFLAGS) ${SONAME_OPTION} \ + -Wl,-rpath,$(CORENRN_LIB_DIR) -L$(CORENRN_LIB_DIR) + # cleanup + rm $(MOD_OBJS_DIR)/libcoreneuron-core/*.o # build static library of mechanisms coremech_lib_static: $(ALL_OBJS) $(ENGINEMECH_OBJ) build_always - mkdir -p $(MOD_OBJS_DIR)/scopmath; \ - cd $(MOD_OBJS_DIR)/scopmath && ar -x $(CORENRN_LIB_DIR)/libscopmath.a && cd -;\ - rm -f ${COREMECH_LIB_PATH}; \ - ar cq ${COREMECH_LIB_PATH} $(ENGINEMECH_OBJ) $(ALL_OBJS) $(MOD_OBJS_DIR)/scopmath/*.o; + # make a libcorenrnmech.a by copying libcoreneuron-core.a and then appending + # the newly compiled objects + cp $(CORENRN_LIB_DIR)/libcoreneuron-core.a ${COREMECH_LIB_PATH} + ar r ${COREMECH_LIB_PATH} $(ENGINEMECH_OBJ) $(ALL_OBJS) # compile cpp files to .o $(MOD_OBJS_DIR)/%.o: $(MOD_TO_CPP_DIR)/%.cpp | $(MOD_OBJS_DIR) @@ -273,7 +273,7 @@ $(MOD_OBJS_DIR): mkdir -p $(MOD_OBJS_DIR) # install binary and libraries -install: $(SPECIAL_EXE) coremech_lib_target +install: $(SPECIAL_EXE) install -d $(DESTDIR)/bin $(DESTDIR)/lib install ${COREMECH_LIB_PATH} $(DESTDIR)/lib install $(SPECIAL_EXE) $(DESTDIR)/bin diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e9cacd422..d6b334ca3 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -27,7 +27,11 @@ find_package(Boost 1.59 QUIET COMPONENTS filesystem system atomic unit_test_fram if(Boost_FOUND) if(CORENRN_ENABLE_UNIT_TESTS) - include_directories(SYSTEM ${Boost_INCLUDE_DIRS}) + add_library(coreneuron-unit-test INTERFACE) + target_compile_options(coreneuron-unit-test + INTERFACE ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) + target_include_directories(coreneuron-unit-test SYSTEM INTERFACE ${Boost_INCLUDE_DIRS}) + target_link_libraries(coreneuron-unit-test INTERFACE coreneuron-all) add_subdirectory(unit/cmdline_interface) add_subdirectory(unit/interleave_info) add_subdirectory(unit/alignment) diff --git a/tests/integration/CMakeLists.txt b/tests/integration/CMakeLists.txt index 7b7e1e1a5..17b57084b 100644 --- a/tests/integration/CMakeLists.txt +++ b/tests/integration/CMakeLists.txt @@ -12,7 +12,7 @@ if(CORENRN_ENABLE_MPI_DYNAMIC) # building single generic mpi library libcorenrn_mpi. # ~~~ if(CORENEURON_AS_SUBPROJECT) - message(INFO "CoreNEURON integration tests are disabled with dynamic MPI") + message(STATUS "CoreNEURON integration tests are disabled with dynamic MPI") return() else() set(CORENRN_MPI_LIB_ARG @@ -31,6 +31,9 @@ set(PERMUTE2_ARGS "--cell-permute 2") set(CUDA_INTERFACE "--cuda-interface") if(CORENRN_ENABLE_GPU) set(GPU_ARGS "--gpu") + set(permutation_modes 1 2) +else() + set(permutation_modes 0 1) endif() # List of tests with arguments @@ -39,32 +42,37 @@ set(TEST_CASES_WITH_ARGS "ring_binqueue!${RING_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_binqueue --binqueue" "ring_multisend!${RING_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_multisend --multisend" "ring_spike_buffer!${RING_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_spike_buffer --spikebuf 1" - "ring_permute1!${RING_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_permute1 ${PERMUTE1_ARGS}" - "ring_permute2!${RING_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_permute2 ${PERMUTE2_ARGS}" "ring_gap!${RING_GAP_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_gap" "ring_gap_binqueue!${RING_GAP_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_gap_binqueue --binqueue" "ring_gap_multisend!${RING_GAP_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_gap_multisend --multisend" - "ring_gap_permute1!${RING_GAP_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_gap_permute1 ${PERMUTE1_ARGS}" - "ring_gap_permute2!${RING_GAP_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_gap_permute2 ${PERMUTE2_ARGS}" ) - -if(CORENRN_ENABLE_GPU) +set(test_suffixes "" "_binqueue" "_multisend") +foreach(cell_permute ${permutation_modes}) + list(APPEND test_suffixes "_permute${cell_permute}") list( APPEND TEST_CASES_WITH_ARGS - "ring_permute2_cudaInterface!${RING_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_permute2_cudaInterface ${PERMUTE2_ARGS} ${CUDA_INTERFACE}" - "ring_gap_permute2_cudaInterface!${RING_GAP_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_gap_permute2_cudaInterface ${PERMUTE2_ARGS} ${CUDA_INTERFACE}" + "ring_permute${cell_permute}!${RING_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_permute${cell_permute} --cell-permute=${cell_permute}" + "ring_gap_permute${cell_permute}!${RING_GAP_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_gap_permute${cell_permute} --cell-permute=${cell_permute}" ) -endif() + # As reports require MPI, do not add test if report is enabled. + if(NOT CORENRN_ENABLE_REPORTING) + list(APPEND test_suffixes "_serial_permute${cell_permute}") + list( + APPEND + TEST_CASES_WITH_ARGS + "ring_serial_permute${cell_permute}!${GPU_ARGS} --cell-permute=${cell_permute} --tstop 100. --celsius 6.3 --datpath ${RING_DATASET_DIR} ${MODEL_STATS_ARG} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_serial_permute${cell_permute}" + ) + endif() +endforeach() -# ~~~ -# As reports require MPI, do not add test if report is enabled. -# ~~~ -if(NOT CORENRN_ENABLE_REPORTING) +if(CORENRN_ENABLE_GPU) + list(APPEND test_suffixes "_permute2_cudaInterface") list( APPEND TEST_CASES_WITH_ARGS - "ring_serial!--tstop 100. --celsius 6.3 --datpath ${RING_DATASET_DIR} ${MODEL_STATS_ARG} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_serial" + "ring_permute2_cudaInterface!${RING_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_permute2_cudaInterface ${PERMUTE2_ARGS} ${CUDA_INTERFACE}" + "ring_gap_permute2_cudaInterface!${RING_GAP_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_gap_permute2_cudaInterface ${PERMUTE2_ARGS} ${CUDA_INTERFACE}" ) endif() @@ -73,18 +81,11 @@ endif() # create them and copy reference spikes # ~~~ foreach(data_dir "ring" "ring_gap") - foreach( - test_suffix - "" - "_serial" - "_multisend" - "_binqueue" - "_savestate_permute0" - "_savestate_permute1" - "_savestate_permute2" - "_permute1" - "_permute2" - "_permute2_cudaInterface") + # Naïve foreach(test_suffix ${test_suffixes}) does not seem to handle empty suffixes correctly. + list(LENGTH test_suffixes num_suffixes) + math(EXPR num_suffixes_m1 "${num_suffixes} - 1") + foreach(suffix_index RANGE 0 ${num_suffixes_m1}) + list(GET test_suffixes ${suffix_index} test_suffix) file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/${data_dir}/out.dat.ref" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/${data_dir}${test_suffix}/") endforeach() diff --git a/tests/unit/alignment/CMakeLists.txt b/tests/unit/alignment/CMakeLists.txt index 92464350e..89da4da14 100644 --- a/tests/unit/alignment/CMakeLists.txt +++ b/tests/unit/alignment/CMakeLists.txt @@ -3,9 +3,7 @@ # # See top-level LICENSE file for details. # ============================================================================= -include_directories(${CMAKE_SOURCE_DIR}/coreneuron ${Boost_INCLUDE_DIRS}) - add_executable(alignment_test_bin alignment.cpp) -target_compile_options(alignment_test_bin PRIVATE ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) -add_test(NAME alignment_test COMMAND ${TEST_EXEC_PREFIX} $) +target_link_libraries(alignment_test_bin coreneuron-unit-test) +add_test(NAME alignment_test COMMAND $) cpp_cc_configure_sanitizers(TARGET alignment_test_bin TEST alignment_test) diff --git a/tests/unit/cmdline_interface/CMakeLists.txt b/tests/unit/cmdline_interface/CMakeLists.txt index cd177c521..cc98ad78d 100644 --- a/tests/unit/cmdline_interface/CMakeLists.txt +++ b/tests/unit/cmdline_interface/CMakeLists.txt @@ -4,14 +4,6 @@ # See top-level LICENSE file for details. # ============================================================================= add_executable(cmd_interface_test_bin test_cmdline_interface.cpp) -target_link_libraries(cmd_interface_test_bin ${MPI_CXX_LIBRARIES} coreneuron - ${corenrn_mech_library} ${reportinglib_LIBRARY} ${sonatareport_LIBRARY}) -target_include_directories(cmd_interface_test_bin SYSTEM - PRIVATE ${CORENEURON_PROJECT_SOURCE_DIR}/external/CLI11/include) -add_dependencies(cmd_interface_test_bin nrniv-core) -# Tell CMake *not* to run an explicit device code linker step (which will produce errors); let the -# NVHPC C++ compiler handle this implicitly. -set_target_properties(cmd_interface_test_bin PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF) -target_compile_options(cmd_interface_test_bin PRIVATE ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) -add_test(NAME cmd_interface_test COMMAND ${TEST_EXEC_PREFIX} $) +target_link_libraries(cmd_interface_test_bin coreneuron-unit-test) +add_test(NAME cmd_interface_test COMMAND $) cpp_cc_configure_sanitizers(TARGET cmd_interface_test_bin TEST cmd_interface_test) diff --git a/tests/unit/cmdline_interface/test_cmdline_interface.cpp b/tests/unit/cmdline_interface/test_cmdline_interface.cpp index caef6ca14..ccd9e1f66 100644 --- a/tests/unit/cmdline_interface/test_cmdline_interface.cpp +++ b/tests/unit/cmdline_interface/test_cmdline_interface.cpp @@ -130,5 +130,5 @@ BOOST_AUTO_TEST_CASE(cmdline_interface) { // Everything has its default value, and the first `false` says not to // include default values in the output, so this should be empty - BOOST_CHECK(corenrn_param_test.app.config_to_str(false, false).empty()); + BOOST_CHECK(corenrn_param_test.config_to_str(false, false).empty()); } diff --git a/tests/unit/interleave_info/CMakeLists.txt b/tests/unit/interleave_info/CMakeLists.txt index ce69b097e..cda875eae 100644 --- a/tests/unit/interleave_info/CMakeLists.txt +++ b/tests/unit/interleave_info/CMakeLists.txt @@ -4,13 +4,6 @@ # See top-level LICENSE file for details. # ============================================================================= add_executable(interleave_info_bin check_constructors.cpp) -target_link_libraries(interleave_info_bin ${MPI_CXX_LIBRARIES} coreneuron ${corenrn_mech_library} - ${reportinglib_LIBRARY} ${sonatareport_LIBRARY}) -add_dependencies(interleave_info_bin nrniv-core) -# Tell CMake *not* to run an explicit device code linker step (which will produce errors); let the -# NVHPC C++ compiler handle this implicitly. -set_target_properties(interleave_info_bin PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF) -target_compile_options(interleave_info_bin PRIVATE ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) -add_test(NAME interleave_info_constructor_test COMMAND ${TEST_EXEC_PREFIX} - $) +target_link_libraries(interleave_info_bin coreneuron-unit-test) +add_test(NAME interleave_info_constructor_test COMMAND $) cpp_cc_configure_sanitizers(TARGET interleave_info_bin TEST interleave_info_constructor_test) diff --git a/tests/unit/lfp/CMakeLists.txt b/tests/unit/lfp/CMakeLists.txt index 61d749aa9..34231b9f9 100644 --- a/tests/unit/lfp/CMakeLists.txt +++ b/tests/unit/lfp/CMakeLists.txt @@ -3,19 +3,9 @@ # # See top-level LICENSE file for details. # ============================================================================= - -include_directories(${CMAKE_SOURCE_DIR}/coreneuron ${Boost_INCLUDE_DIRS}) -file(GLOB lfp_test_src "*.cpp") - -add_executable(lfp_test_bin ${lfp_test_src}) -target_link_libraries(lfp_test_bin ${MPI_CXX_LIBRARIES} coreneuron ${corenrn_mech_library} - ${reportinglib_LIBRARY} ${sonatareport_LIBRARY}) -# Tell CMake *not* to run an explicit device code linker step (which will produce errors); let the -# NVHPC C++ compiler handle this implicitly. -set_target_properties(lfp_test_bin PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF) -target_compile_options(lfp_test_bin PRIVATE ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) -add_dependencies(lfp_test_bin nrniv-core) -add_test(NAME lfp_test COMMAND ${TEST_EXEC_PREFIX} $) +add_executable(lfp_test_bin lfp.cpp) +target_link_libraries(lfp_test_bin coreneuron-unit-test) +add_test(NAME lfp_test COMMAND $) cpp_cc_configure_sanitizers(TARGET lfp_test_bin TEST lfp_test) set_property( TEST lfp_test diff --git a/tests/unit/queueing/CMakeLists.txt b/tests/unit/queueing/CMakeLists.txt index ba3725d32..05b2a12f2 100644 --- a/tests/unit/queueing/CMakeLists.txt +++ b/tests/unit/queueing/CMakeLists.txt @@ -4,12 +4,6 @@ # See top-level LICENSE file for details. # ============================================================================= add_executable(queuing_test_bin test_queueing.cpp) -target_link_libraries(queuing_test_bin ${Boost_SYSTEM_LIBRARY} coreneuron ${corenrn_mech_library} - ${reportinglib_LIBRARY} ${sonatareport_LIBRARY}) -add_dependencies(queuing_test_bin nrniv-core) -# Tell CMake *not* to run an explicit device code linker step (which will produce errors); let the -# NVHPC C++ compiler handle this implicitly. -set_target_properties(queuing_test_bin PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF) -target_compile_options(queuing_test_bin PRIVATE ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) -add_test(NAME queuing_test COMMAND ${TEST_EXEC_PREFIX} $) +target_link_libraries(queuing_test_bin coreneuron-unit-test) +add_test(NAME queuing_test COMMAND $) cpp_cc_configure_sanitizers(TARGET queuing_test_bin TEST queuing_test) diff --git a/tests/unit/solver/CMakeLists.txt b/tests/unit/solver/CMakeLists.txt index 1d01ea4b1..f8bc52287 100644 --- a/tests/unit/solver/CMakeLists.txt +++ b/tests/unit/solver/CMakeLists.txt @@ -1,19 +1,9 @@ # ============================================================================= -# Copyright (C) 2022 Blue Brain Project +# Copyright (c) 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= - -include_directories(${CMAKE_SOURCE_DIR}/coreneuron ${Boost_INCLUDE_DIRS}) add_executable(test-solver test_solver.cpp) -target_link_libraries(test-solver coreneuron ${corenrn_mech_library}) -target_include_directories(test-solver SYSTEM - PRIVATE ${CORENEURON_PROJECT_SOURCE_DIR}/external/CLI11/include) - -# Tell CMake *not* to run an explicit device code linker step (which will produce errors); let the -# NVHPC C++ compiler handle this implicitly. -set_target_properties(test-solver PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF) -target_compile_options(test-solver PRIVATE ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) -add_dependencies(test-solver nrniv-core) +target_link_libraries(test-solver coreneuron-unit-test) add_test(NAME test-solver COMMAND $) cpp_cc_configure_sanitizers(TARGET test-solver TEST test-solver) diff --git a/tests/unit/solver/test_solver.cpp b/tests/unit/solver/test_solver.cpp index 6511f03e1..c1021bcb7 100644 --- a/tests/unit/solver/test_solver.cpp +++ b/tests/unit/solver/test_solver.cpp @@ -196,7 +196,9 @@ struct SetupThreads { } ~SetupThreads() { - delete_nrnthreads_on_device(nrn_threads, nrn_nthread); + if (corenrn_param.gpu) { + delete_nrnthreads_on_device(nrn_threads, nrn_nthread); + } for (auto& nt: *this) { free_memory(std::exchange(nt._data, nullptr)); delete[] std::exchange(nt._permute, nullptr); From 8290df28aead01ee058df4ecc8d5726f91582dca Mon Sep 17 00:00:00 2001 From: Jorge Blanco Alonso Date: Wed, 27 Oct 2021 16:07:58 +0200 Subject: [PATCH 4/9] Read lfp factors from neurodamus --- coreneuron/io/nrn_filehandler.hpp | 8 +++++++- coreneuron/io/nrn_setup.cpp | 2 +- coreneuron/io/nrnsection_mapping.hpp | 17 +++++++++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/coreneuron/io/nrn_filehandler.hpp b/coreneuron/io/nrn_filehandler.hpp index 65cab4b97..26d960ef0 100644 --- a/coreneuron/io/nrn_filehandler.hpp +++ b/coreneuron/io/nrn_filehandler.hpp @@ -14,6 +14,7 @@ #include #include "coreneuron/utils/nrn_assert.h" +#include "coreneuron/io/nrnsection_mapping.hpp" namespace coreneuron { /** Encapsulate low-level reading of coreneuron input data files. @@ -110,7 +111,7 @@ class FileHandler { * Read count no of mappings for section to segment */ template - int read_mapping_info(T* mapinfo) { + int read_mapping_info(T* mapinfo, NrnThreadMappingInfo* ntmapping) { int nsec, nseg, n_scan; char line_buf[max_line_length], name[max_line_length]; @@ -123,14 +124,19 @@ class FileHandler { if (nseg) { std::vector sec, seg; + std::vector lfp_factors; sec.reserve(nseg); seg.reserve(nseg); + lfp_factors.reserve(nseg); read_array(&sec[0], nseg); read_array(&seg[0], nseg); + read_array(&lfp_factors[0], nseg); for (int i = 0; i < nseg; i++) { mapinfo->add_segment(sec[i], seg[i]); + ntmapping->add_segment_id(seg[i]); + ntmapping->add_segment_lfp_factor(seg[i], lfp_factors[i]); } } return nseg; diff --git a/coreneuron/io/nrn_setup.cpp b/coreneuron/io/nrn_setup.cpp index 703e853d8..e24f1d424 100644 --- a/coreneuron/io/nrn_setup.cpp +++ b/coreneuron/io/nrn_setup.cpp @@ -959,7 +959,7 @@ void read_phase3(NrnThread& nt, UserParams& userParams) { // read section-segment mapping for every section list for (int j = 0; j < nseclist; j++) { SecMapping* smap = new SecMapping(); - F.read_mapping_info(smap); + F.read_mapping_info(smap, ntmapping); cmap->add_sec_map(smap); } diff --git a/coreneuron/io/nrnsection_mapping.hpp b/coreneuron/io/nrnsection_mapping.hpp index d7524fc42..9756fca8d 100644 --- a/coreneuron/io/nrnsection_mapping.hpp +++ b/coreneuron/io/nrnsection_mapping.hpp @@ -14,6 +14,7 @@ #include #include #include +#include namespace coreneuron { @@ -153,6 +154,12 @@ struct NrnThreadMappingInfo { /** list of cells mapping */ std::vector mappingvec; + /** list of segment ids */ + std::vector segment_ids; + + /** map containing segment ids an its respective lfp factors */ + std::unordered_map lfp_factors; + /** @brief number of cells */ size_t size() const { return mappingvec.size(); @@ -181,5 +188,15 @@ struct NrnThreadMappingInfo { void add_cell_mapping(CellMapping* c) { mappingvec.push_back(c); } + + /** @brief add a new segment */ + void add_segment_id(const int segment_id) { + segment_ids.push_back(segment_id); + } + + /** @brief add the lfp factor of a segment_id */ + void add_segment_lfp_factor(const int segment_id, double factor) { + lfp_factors.insert({segment_id, factor}); + } }; } // namespace coreneuron From 99d94963fb6e610aac1369b52a9d6d3ab9325448 Mon Sep 17 00:00:00 2001 From: Jorge Blanco Alonso Date: Fri, 5 Nov 2021 17:32:51 +0100 Subject: [PATCH 5/9] Add lfp report type --- coreneuron/io/nrn_filehandler.hpp | 3 ++ coreneuron/io/nrnsection_mapping.hpp | 2 ++ coreneuron/io/reports/nrnreport.hpp | 3 +- .../reports/report_configuration_parser.cpp | 3 ++ coreneuron/io/reports/report_event.cpp | 21 +++++++++++ coreneuron/io/reports/report_event.hpp | 1 + coreneuron/io/reports/report_handler.cpp | 36 +++++++++++++++++++ coreneuron/io/reports/report_handler.hpp | 3 ++ .../io/reports/sonata_report_handler.cpp | 1 + 9 files changed, 72 insertions(+), 1 deletion(-) diff --git a/coreneuron/io/nrn_filehandler.hpp b/coreneuron/io/nrn_filehandler.hpp index 26d960ef0..ad3ed2eca 100644 --- a/coreneuron/io/nrn_filehandler.hpp +++ b/coreneuron/io/nrn_filehandler.hpp @@ -133,6 +133,9 @@ class FileHandler { read_array(&seg[0], nseg); read_array(&lfp_factors[0], nseg); + std::cout << "=====> NEW CoreNEURON!" << std::endl; + std::cout << "nseg = " << nseg << std::endl; + for (int i = 0; i < nseg; i++) { mapinfo->add_segment(sec[i], seg[i]); ntmapping->add_segment_id(seg[i]); diff --git a/coreneuron/io/nrnsection_mapping.hpp b/coreneuron/io/nrnsection_mapping.hpp index 9756fca8d..29836a24b 100644 --- a/coreneuron/io/nrnsection_mapping.hpp +++ b/coreneuron/io/nrnsection_mapping.hpp @@ -160,6 +160,8 @@ struct NrnThreadMappingInfo { /** map containing segment ids an its respective lfp factors */ std::unordered_map lfp_factors; + std::vector _lfp; + /** @brief number of cells */ size_t size() const { return mappingvec.size(); diff --git a/coreneuron/io/reports/nrnreport.hpp b/coreneuron/io/reports/nrnreport.hpp index 7cdc05806..ac85d75be 100644 --- a/coreneuron/io/reports/nrnreport.hpp +++ b/coreneuron/io/reports/nrnreport.hpp @@ -76,7 +76,8 @@ enum ReportType { SynapseReport, IMembraneReport, SectionReport, - SummationReport + SummationReport, + LFPReport }; // enumerate that defines the section type for a Section report diff --git a/coreneuron/io/reports/report_configuration_parser.cpp b/coreneuron/io/reports/report_configuration_parser.cpp index 6c69662b7..528c269a6 100644 --- a/coreneuron/io/reports/report_configuration_parser.cpp +++ b/coreneuron/io/reports/report_configuration_parser.cpp @@ -138,6 +138,9 @@ std::vector create_report_configurations(const std::string& report_type = SynapseReport; } else if (report.type_str == "summation") { report_type = SummationReport; + } else if (report.type_str == "lfp") { + nrn_use_fast_imem = true; + report_type = LFPReport; } else { std::cerr << "Report error: unsupported type " << report.type_str << std::endl; nrn_abort(1); diff --git a/coreneuron/io/reports/report_event.cpp b/coreneuron/io/reports/report_event.cpp index 78eb6b268..139fc88b0 100644 --- a/coreneuron/io/reports/report_event.cpp +++ b/coreneuron/io/reports/report_event.cpp @@ -10,6 +10,7 @@ #include "coreneuron/sim/multicore.hpp" #include "coreneuron/io/reports/nrnreport.hpp" #include "coreneuron/utils/nrn_assert.h" +#include "coreneuron/io/nrnsection_mapping.hpp" #ifdef ENABLE_BIN_REPORTS #include "reportinglib/Records.h" #endif // ENABLE_BIN_REPORTS @@ -72,12 +73,32 @@ void ReportEvent::summation_alu(NrnThread* nt) { } } +void ReportEvent::lfp_calc(NrnThread* nt) { + // Calculate lfp only on reporting steps + if (step > 0 && (static_cast(step) % reporting_period) == 0) { + auto* mapinfo = static_cast(nt->mapping); + double sum = 0.0; + double* fast_imem_rhs = nt->nrn_fast_imem->nrn_sav_rhs; + for (const auto& kv: mapinfo->lfp_factors) { + int segment_id = kv.first; + double factor = kv.second; + if(std::isnan(factor)) { + factor = 0.0; + } + std::cout << segment_id << " - " << factor << std::endl; + sum += fast_imem_rhs[segment_id] * factor; + } + mapinfo->_lfp[0] = sum; + }mdi +} + /** on deliver, call ReportingLib and setup next event */ void ReportEvent::deliver(double t, NetCvode* nc, NrnThread* nt) { /* reportinglib is not thread safe */ #pragma omp critical { summation_alu(nt); + lfp_calc(nt); // each thread needs to know its own step #ifdef ENABLE_BIN_REPORTS records_nrec(step, gids_to_report.size(), gids_to_report.data(), report_path.data()); diff --git a/coreneuron/io/reports/report_event.hpp b/coreneuron/io/reports/report_event.hpp index 20d325614..cf2d7bd19 100644 --- a/coreneuron/io/reports/report_event.hpp +++ b/coreneuron/io/reports/report_event.hpp @@ -42,6 +42,7 @@ class ReportEvent: public DiscreteEvent { void deliver(double t, NetCvode* nc, NrnThread* nt) override; bool require_checkpoint() override; void summation_alu(NrnThread* nt); + void lfp_calc(NrnThread* nt); private: double dt; diff --git a/coreneuron/io/reports/report_handler.cpp b/coreneuron/io/reports/report_handler.cpp index 84341e7a4..fd385cd68 100644 --- a/coreneuron/io/reports/report_handler.cpp +++ b/coreneuron/io/reports/report_handler.cpp @@ -35,6 +35,7 @@ void ReportHandler::create_report(double dt, double tstop, double delay) { continue; } const std::vector& nodes_to_gid = map_gids(nt); + auto* mapinfo = static_cast(nt.mapping); VarsToReport vars_to_report; bool is_soma_target; switch (m_report_config.type) { @@ -58,6 +59,13 @@ void ReportHandler::create_report(double dt, double tstop, double delay) { nodes_to_gid); register_custom_report(nt, m_report_config, vars_to_report); break; + case LFPReport: + mapinfo->_lfp.resize(12); + vars_to_report = get_lfp_vars_to_report(nt, m_report_config, mapinfo->_lfp.data()); + is_soma_target = m_report_config.section_type == SectionType::Soma || + m_report_config.section_type == SectionType::Cell; + register_section_report(nt, m_report_config, vars_to_report, is_soma_target); + break; default: vars_to_report = get_synapse_vars_to_report(nt, m_report_config, nodes_to_gid); register_custom_report(nt, m_report_config, vars_to_report); @@ -341,6 +349,34 @@ VarsToReport ReportHandler::get_synapse_vars_to_report( return vars_to_report; } +VarsToReport ReportHandler::get_lfp_vars_to_report(const NrnThread& nt, + ReportConfiguration& report, + double* report_variable) const { + VarsToReport vars_to_report; + /*const auto* mapinfo = static_cast(nt.mapping); + if (!mapinfo) { + std::cerr << "[LFP] Error : mapping information is missing for a Cell group " + << nt.ncell << '\n'; + nrn_abort(1); + }*/ + for (int i = 0; i < nt.ncell; i++) { + int gid = nt.presyns[i].gid_; + if (report.target.find(gid) == report.target.end()) { + continue; + } + + std::vector to_report; + // Add all electrodes to the first gid for now + std::vector electrode_ids = {0}; + for (const auto& electrode_id : electrode_ids) { + double* variable = report_variable + electrode_id; + to_report.push_back(VarWithMapping(electrode_id, variable)); + } + vars_to_report[gid] = to_report; + } + return vars_to_report; +} + // map GIDs of every compartment, it consist in a backward sweep then forward sweep algorithm std::vector ReportHandler::map_gids(const NrnThread& nt) const { std::vector nodes_gid(nt.end, -1); diff --git a/coreneuron/io/reports/report_handler.hpp b/coreneuron/io/reports/report_handler.hpp index 084886c96..db62bf993 100644 --- a/coreneuron/io/reports/report_handler.hpp +++ b/coreneuron/io/reports/report_handler.hpp @@ -44,6 +44,9 @@ class ReportHandler { VarsToReport get_synapse_vars_to_report(const NrnThread& nt, ReportConfiguration& report, const std::vector& nodes_to_gids) const; + VarsToReport get_lfp_vars_to_report(const NrnThread& nt, + ReportConfiguration& report, + double* report_variable) const; std::vector map_gids(const NrnThread& nt) const; #endif // defined(ENABLE_BIN_REPORTS) || defined(ENABLE_SONATA_REPORTS) protected: diff --git a/coreneuron/io/reports/sonata_report_handler.cpp b/coreneuron/io/reports/sonata_report_handler.cpp index 99f5709d5..bc38ef27b 100644 --- a/coreneuron/io/reports/sonata_report_handler.cpp +++ b/coreneuron/io/reports/sonata_report_handler.cpp @@ -57,6 +57,7 @@ std::pair SonataReportHandler::get_population_info(int gid) { void SonataReportHandler::register_report(const NrnThread& nt, ReportConfiguration& config, const VarsToReport& vars_to_report) { + std::cout << "Registering report " << config.output_path.data() << std::endl; sonata_create_report(config.output_path.data(), config.start, config.stop, From c1b29ccab43b2cfc3b06b3bacabf19501bbfb280 Mon Sep 17 00:00:00 2001 From: Jorge Blanco Alonso Date: Fri, 3 Dec 2021 11:52:28 +0100 Subject: [PATCH 6/9] Calculate 1 lfp value per gid/timestep --- coreneuron/io/nrn_filehandler.hpp | 7 ++--- coreneuron/io/nrn_setup.cpp | 2 +- coreneuron/io/nrnsection_mapping.hpp | 16 +++++----- coreneuron/io/reports/report_event.cpp | 38 ++++++++++++++++-------- coreneuron/io/reports/report_event.hpp | 4 ++- coreneuron/io/reports/report_handler.cpp | 20 ++++--------- 6 files changed, 45 insertions(+), 42 deletions(-) diff --git a/coreneuron/io/nrn_filehandler.hpp b/coreneuron/io/nrn_filehandler.hpp index ad3ed2eca..9688f9575 100644 --- a/coreneuron/io/nrn_filehandler.hpp +++ b/coreneuron/io/nrn_filehandler.hpp @@ -111,7 +111,7 @@ class FileHandler { * Read count no of mappings for section to segment */ template - int read_mapping_info(T* mapinfo, NrnThreadMappingInfo* ntmapping) { + int read_mapping_info(T* mapinfo, NrnThreadMappingInfo* ntmapping, CellMapping* cmap) { int nsec, nseg, n_scan; char line_buf[max_line_length], name[max_line_length]; @@ -133,13 +133,10 @@ class FileHandler { read_array(&seg[0], nseg); read_array(&lfp_factors[0], nseg); - std::cout << "=====> NEW CoreNEURON!" << std::endl; - std::cout << "nseg = " << nseg << std::endl; - for (int i = 0; i < nseg; i++) { mapinfo->add_segment(sec[i], seg[i]); ntmapping->add_segment_id(seg[i]); - ntmapping->add_segment_lfp_factor(seg[i], lfp_factors[i]); + cmap->add_segment_lfp_factor(seg[i], lfp_factors[i]); } } return nseg; diff --git a/coreneuron/io/nrn_setup.cpp b/coreneuron/io/nrn_setup.cpp index e24f1d424..19d5b5297 100644 --- a/coreneuron/io/nrn_setup.cpp +++ b/coreneuron/io/nrn_setup.cpp @@ -959,7 +959,7 @@ void read_phase3(NrnThread& nt, UserParams& userParams) { // read section-segment mapping for every section list for (int j = 0; j < nseclist; j++) { SecMapping* smap = new SecMapping(); - F.read_mapping_info(smap, ntmapping); + F.read_mapping_info(smap, ntmapping, cmap); cmap->add_sec_map(smap); } diff --git a/coreneuron/io/nrnsection_mapping.hpp b/coreneuron/io/nrnsection_mapping.hpp index 29836a24b..5b9c7bd57 100644 --- a/coreneuron/io/nrnsection_mapping.hpp +++ b/coreneuron/io/nrnsection_mapping.hpp @@ -74,6 +74,9 @@ struct CellMapping { /** list of section lists (like soma, axon, apic) */ std::vector secmapvec; + /** map containing segment ids an its respective lfp factors */ + std::unordered_map lfp_factors; + CellMapping(int g) : gid(g) {} @@ -138,6 +141,11 @@ struct CellMapping { return count; } + /** @brief add the lfp factor of a segment_id */ + void add_segment_lfp_factor(const int segment_id, double factor) { + lfp_factors.insert({segment_id, factor}); + } + ~CellMapping() { for (size_t i = 0; i < secmapvec.size(); i++) { delete secmapvec[i]; @@ -157,9 +165,6 @@ struct NrnThreadMappingInfo { /** list of segment ids */ std::vector segment_ids; - /** map containing segment ids an its respective lfp factors */ - std::unordered_map lfp_factors; - std::vector _lfp; /** @brief number of cells */ @@ -195,10 +200,5 @@ struct NrnThreadMappingInfo { void add_segment_id(const int segment_id) { segment_ids.push_back(segment_id); } - - /** @brief add the lfp factor of a segment_id */ - void add_segment_lfp_factor(const int segment_id, double factor) { - lfp_factors.insert({segment_id, factor}); - } }; } // namespace coreneuron diff --git a/coreneuron/io/reports/report_event.cpp b/coreneuron/io/reports/report_event.cpp index 139fc88b0..9deaff30c 100644 --- a/coreneuron/io/reports/report_event.cpp +++ b/coreneuron/io/reports/report_event.cpp @@ -25,11 +25,13 @@ ReportEvent::ReportEvent(double dt, double tstart, const VarsToReport& filtered_gids, const char* name, - double report_dt) + double report_dt, + ReportType type) : dt(dt) , tstart(tstart) , report_path(name) , report_dt(report_dt) + , report_type(type) , vars_to_report(filtered_gids) { nrn_assert(filtered_gids.size()); step = tstart / dt; @@ -77,19 +79,27 @@ void ReportEvent::lfp_calc(NrnThread* nt) { // Calculate lfp only on reporting steps if (step > 0 && (static_cast(step) % reporting_period) == 0) { auto* mapinfo = static_cast(nt->mapping); - double sum = 0.0; double* fast_imem_rhs = nt->nrn_fast_imem->nrn_sav_rhs; - for (const auto& kv: mapinfo->lfp_factors) { - int segment_id = kv.first; - double factor = kv.second; - if(std::isnan(factor)) { - factor = 0.0; - } - std::cout << segment_id << " - " << factor << std::endl; - sum += fast_imem_rhs[segment_id] * factor; + + for (const auto& kv: vars_to_report) { + int gid = kv.first; + const auto& to_report = kv.second; + const auto& cell_mapping = mapinfo->get_cell_mapping(gid); + + int count = 0; + double sum = 0.0; + for (const auto& kv: cell_mapping->lfp_factors) { + int segment_id = kv.first; + double factor = kv.second; + if(std::isnan(factor)) { + factor = 0.0; + } + sum += fast_imem_rhs[segment_id] * factor; + count++; + } + *(to_report.front().var_value) = sum; } - mapinfo->_lfp[0] = sum; - }mdi + } } /** on deliver, call ReportingLib and setup next event */ @@ -98,7 +108,9 @@ void ReportEvent::deliver(double t, NetCvode* nc, NrnThread* nt) { #pragma omp critical { summation_alu(nt); - lfp_calc(nt); + if (report_type == ReportType::LFPReport) { + lfp_calc(nt); + } // each thread needs to know its own step #ifdef ENABLE_BIN_REPORTS records_nrec(step, gids_to_report.size(), gids_to_report.data(), report_path.data()); diff --git a/coreneuron/io/reports/report_event.hpp b/coreneuron/io/reports/report_event.hpp index cf2d7bd19..0f1a07358 100644 --- a/coreneuron/io/reports/report_event.hpp +++ b/coreneuron/io/reports/report_event.hpp @@ -36,7 +36,8 @@ class ReportEvent: public DiscreteEvent { double tstart, const VarsToReport& filtered_gids, const char* name, - double report_dt); + double report_dt, + ReportType type); /** on deliver, call ReportingLib and setup next event */ void deliver(double t, NetCvode* nc, NrnThread* nt) override; @@ -53,6 +54,7 @@ class ReportEvent: public DiscreteEvent { std::vector gids_to_report; double tstart; VarsToReport vars_to_report; + ReportType report_type; }; #endif // defined(ENABLE_BIN_REPORTS) || defined(ENABLE_SONATA_REPORTS) diff --git a/coreneuron/io/reports/report_handler.cpp b/coreneuron/io/reports/report_handler.cpp index fd385cd68..1b69cd6ff 100644 --- a/coreneuron/io/reports/report_handler.cpp +++ b/coreneuron/io/reports/report_handler.cpp @@ -60,7 +60,8 @@ void ReportHandler::create_report(double dt, double tstop, double delay) { register_custom_report(nt, m_report_config, vars_to_report); break; case LFPReport: - mapinfo->_lfp.resize(12); + // 1 lfp value per gid + mapinfo->_lfp.resize(nt.ncell); vars_to_report = get_lfp_vars_to_report(nt, m_report_config, mapinfo->_lfp.data()); is_soma_target = m_report_config.section_type == SectionType::Soma || m_report_config.section_type == SectionType::Cell; @@ -75,7 +76,8 @@ void ReportHandler::create_report(double dt, double tstop, double delay) { t, vars_to_report, m_report_config.output_path.data(), - m_report_config.report_dt); + m_report_config.report_dt, + m_report_config.type); report_event->send(t, net_cvode_instance, &nt); m_report_events.push_back(std::move(report_event)); } @@ -353,12 +355,6 @@ VarsToReport ReportHandler::get_lfp_vars_to_report(const NrnThread& nt, ReportConfiguration& report, double* report_variable) const { VarsToReport vars_to_report; - /*const auto* mapinfo = static_cast(nt.mapping); - if (!mapinfo) { - std::cerr << "[LFP] Error : mapping information is missing for a Cell group " - << nt.ncell << '\n'; - nrn_abort(1); - }*/ for (int i = 0; i < nt.ncell; i++) { int gid = nt.presyns[i].gid_; if (report.target.find(gid) == report.target.end()) { @@ -366,12 +362,8 @@ VarsToReport ReportHandler::get_lfp_vars_to_report(const NrnThread& nt, } std::vector to_report; - // Add all electrodes to the first gid for now - std::vector electrode_ids = {0}; - for (const auto& electrode_id : electrode_ids) { - double* variable = report_variable + electrode_id; - to_report.push_back(VarWithMapping(electrode_id, variable)); - } + double* variable = report_variable + i; + to_report.push_back(VarWithMapping(i, variable)); vars_to_report[gid] = to_report; } return vars_to_report; From 7b3787e4a6cfd3584b9d7c4ffc98bd478a3ce338 Mon Sep 17 00:00:00 2001 From: Jorge Blanco Alonso Date: Thu, 6 Jan 2022 18:05:14 +0100 Subject: [PATCH 7/9] Remove cout message --- coreneuron/io/reports/sonata_report_handler.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/coreneuron/io/reports/sonata_report_handler.cpp b/coreneuron/io/reports/sonata_report_handler.cpp index bc38ef27b..99f5709d5 100644 --- a/coreneuron/io/reports/sonata_report_handler.cpp +++ b/coreneuron/io/reports/sonata_report_handler.cpp @@ -57,7 +57,6 @@ std::pair SonataReportHandler::get_population_info(int gid) { void SonataReportHandler::register_report(const NrnThread& nt, ReportConfiguration& config, const VarsToReport& vars_to_report) { - std::cout << "Registering report " << config.output_path.data() << std::endl; sonata_create_report(config.output_path.data(), config.start, config.stop, From be0210b26685bc272989b3981ba3a54e13641b36 Mon Sep 17 00:00:00 2001 From: Jorge Blanco Alonso Date: Tue, 5 Apr 2022 18:32:03 +0200 Subject: [PATCH 8/9] Take into account IClamp for the lfp calculation --- coreneuron/io/reports/report_event.cpp | 16 ++++++++++++---- coreneuron/io/reports/report_handler.cpp | 17 ++++++++++++++--- coreneuron/io/reports/report_handler.hpp | 3 ++- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/coreneuron/io/reports/report_event.cpp b/coreneuron/io/reports/report_event.cpp index 9deaff30c..5a943bf02 100644 --- a/coreneuron/io/reports/report_event.cpp +++ b/coreneuron/io/reports/report_event.cpp @@ -80,7 +80,7 @@ void ReportEvent::lfp_calc(NrnThread* nt) { if (step > 0 && (static_cast(step) % reporting_period) == 0) { auto* mapinfo = static_cast(nt->mapping); double* fast_imem_rhs = nt->nrn_fast_imem->nrn_sav_rhs; - + auto& summation_report = nt->summation_report_handler_->summation_reports_[report_path]; for (const auto& kv: vars_to_report) { int gid = kv.first; const auto& to_report = kv.second; @@ -94,7 +94,13 @@ void ReportEvent::lfp_calc(NrnThread* nt) { if(std::isnan(factor)) { factor = 0.0; } - sum += fast_imem_rhs[segment_id] * factor; + double iclamp = 0.0; + for (const auto& value: summation_report.currents_[segment_id]) { + double current_value = *value.first; + int scale = value.second; + iclamp += current_value * scale; + } + sum += (fast_imem_rhs[segment_id] + iclamp) * factor; count++; } *(to_report.front().var_value) = sum; @@ -107,8 +113,10 @@ void ReportEvent::deliver(double t, NetCvode* nc, NrnThread* nt) { /* reportinglib is not thread safe */ #pragma omp critical { - summation_alu(nt); - if (report_type == ReportType::LFPReport) { + if (report_type == ReportType::SummationReport) { + summation_alu(nt); + } + else if (report_type == ReportType::LFPReport) { lfp_calc(nt); } // each thread needs to know its own step diff --git a/coreneuron/io/reports/report_handler.cpp b/coreneuron/io/reports/report_handler.cpp index 1b69cd6ff..53efb137d 100644 --- a/coreneuron/io/reports/report_handler.cpp +++ b/coreneuron/io/reports/report_handler.cpp @@ -62,7 +62,7 @@ void ReportHandler::create_report(double dt, double tstop, double delay) { case LFPReport: // 1 lfp value per gid mapinfo->_lfp.resize(nt.ncell); - vars_to_report = get_lfp_vars_to_report(nt, m_report_config, mapinfo->_lfp.data()); + vars_to_report = get_lfp_vars_to_report(nt, m_report_config, mapinfo->_lfp.data(), nodes_to_gid); is_soma_target = m_report_config.section_type == SectionType::Soma || m_report_config.section_type == SectionType::Cell; register_section_report(nt, m_report_config, vars_to_report, is_soma_target); @@ -353,14 +353,25 @@ VarsToReport ReportHandler::get_synapse_vars_to_report( VarsToReport ReportHandler::get_lfp_vars_to_report(const NrnThread& nt, ReportConfiguration& report, - double* report_variable) const { + double* report_variable, + const std::vector& nodes_to_gids) const { + auto& summation_report = nt.summation_report_handler_->summation_reports_[report.output_path]; VarsToReport vars_to_report; for (int i = 0; i < nt.ncell; i++) { int gid = nt.presyns[i].gid_; if (report.target.find(gid) == report.target.end()) { continue; } - + // IClamp is needed for the LFP calculation + auto mech_id = nrn_get_mechtype("IClamp"); + Memb_list* ml = nt._ml_list[mech_id]; + for (int j = 0; j < ml->nodecount; j++) { + auto segment_id = ml->nodeindices[j]; + if ((nodes_to_gids[segment_id] == gid)) { + double* var_value = get_var_location_from_var_name(mech_id, "i", ml, j); + summation_report.currents_[segment_id].push_back(std::make_pair(var_value, -1)); + } + } std::vector to_report; double* variable = report_variable + i; to_report.push_back(VarWithMapping(i, variable)); diff --git a/coreneuron/io/reports/report_handler.hpp b/coreneuron/io/reports/report_handler.hpp index db62bf993..746edbaee 100644 --- a/coreneuron/io/reports/report_handler.hpp +++ b/coreneuron/io/reports/report_handler.hpp @@ -46,7 +46,8 @@ class ReportHandler { const std::vector& nodes_to_gids) const; VarsToReport get_lfp_vars_to_report(const NrnThread& nt, ReportConfiguration& report, - double* report_variable) const; + double* report_variable, + const std::vector& nodes_to_gids) const; std::vector map_gids(const NrnThread& nt) const; #endif // defined(ENABLE_BIN_REPORTS) || defined(ENABLE_SONATA_REPORTS) protected: From 54b4d266ceab6399db5a4cc652b84b23247d19c5 Mon Sep 17 00:00:00 2001 From: Jorge Blanco Alonso Date: Wed, 6 Apr 2022 11:22:18 +0200 Subject: [PATCH 9/9] Clang format and other minor changes --- coreneuron/io/reports/report_event.cpp | 29 ++++++++++++------------ coreneuron/io/reports/report_handler.cpp | 3 ++- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/coreneuron/io/reports/report_event.cpp b/coreneuron/io/reports/report_event.cpp index 5a943bf02..6189c4526 100644 --- a/coreneuron/io/reports/report_event.cpp +++ b/coreneuron/io/reports/report_event.cpp @@ -89,19 +89,19 @@ void ReportEvent::lfp_calc(NrnThread* nt) { int count = 0; double sum = 0.0; for (const auto& kv: cell_mapping->lfp_factors) { - int segment_id = kv.first; - double factor = kv.second; - if(std::isnan(factor)) { - factor = 0.0; - } - double iclamp = 0.0; - for (const auto& value: summation_report.currents_[segment_id]) { - double current_value = *value.first; - int scale = value.second; - iclamp += current_value * scale; - } - sum += (fast_imem_rhs[segment_id] + iclamp) * factor; - count++; + int segment_id = kv.first; + double factor = kv.second; + if (std::isnan(factor)) { + factor = 0.0; + } + double iclamp = 0.0; + for (const auto& value: summation_report.currents_[segment_id]) { + double current_value = *value.first; + int scale = value.second; + iclamp += current_value * scale; + } + sum += (fast_imem_rhs[segment_id] + iclamp) * factor; + count++; } *(to_report.front().var_value) = sum; } @@ -115,8 +115,7 @@ void ReportEvent::deliver(double t, NetCvode* nc, NrnThread* nt) { { if (report_type == ReportType::SummationReport) { summation_alu(nt); - } - else if (report_type == ReportType::LFPReport) { + } else if (report_type == ReportType::LFPReport) { lfp_calc(nt); } // each thread needs to know its own step diff --git a/coreneuron/io/reports/report_handler.cpp b/coreneuron/io/reports/report_handler.cpp index 53efb137d..e720f331e 100644 --- a/coreneuron/io/reports/report_handler.cpp +++ b/coreneuron/io/reports/report_handler.cpp @@ -62,7 +62,8 @@ void ReportHandler::create_report(double dt, double tstop, double delay) { case LFPReport: // 1 lfp value per gid mapinfo->_lfp.resize(nt.ncell); - vars_to_report = get_lfp_vars_to_report(nt, m_report_config, mapinfo->_lfp.data(), nodes_to_gid); + vars_to_report = + get_lfp_vars_to_report(nt, m_report_config, mapinfo->_lfp.data(), nodes_to_gid); is_soma_target = m_report_config.section_type == SectionType::Soma || m_report_config.section_type == SectionType::Cell; register_section_report(nt, m_report_config, vars_to_report, is_soma_target);