Skip to content
This repository was archived by the owner on Mar 21, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ See the [changelog](CHANGELOG.md) for details about specific releases.

| CUB Release | Included In |
| ------------------------- | --------------------------------------- |
| 2.0.1 | CUDA Toolkit 12.0 |
| 2.0.0 | TBD |
| 1.17.2 | TBD |
| 1.17.1 | TBD |
Expand Down
2 changes: 1 addition & 1 deletion cmake/CubCudaConfig.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ enable_language(CUDA)
# Architecture options:
#

set(all_archs 35 37 50 52 53 60 61 62 70 72 75 80 86)
set(all_archs 35 37 50 52 53 60 61 62 70 72 75 80 86 90)
set(arch_message "CUB: Explicitly enabled compute architectures:")

# Thrust sets up the architecture flags in CMAKE_CUDA_FLAGS already. Just
Expand Down
9 changes: 9 additions & 0 deletions cub/detail/detect_cuda_runtime.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,15 @@ namespace detail

#endif // CUB_RUNTIME_FUNCTION predefined

#ifdef CUB_RDC_ENABLED
// Detect available version of CDP:
#if __CUDACC_VER_MAJOR__ < 12 || defined(CUDA_FORCE_CDP1_IF_SUPPORTED)
#define CUB_DETAIL_CDPv1
#else
#define CUB_DETAIL_CDPv2
#endif
#endif

#endif // Do not document

} // namespace detail
Expand Down
18 changes: 13 additions & 5 deletions cub/detail/device_synchronize.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,26 @@ namespace detail
CUB_EXEC_CHECK_DISABLE
CUB_RUNTIME_FUNCTION inline cudaError_t device_synchronize()
{
cudaError_t result = cudaErrorUnknown;
cudaError_t result = cudaErrorNotSupported;

#if defined(__CUDACC__) && \
((__CUDACC_VER_MAJOR__ > 11) || \
((__CUDACC_VER_MAJOR__ == 11) && (__CUDACC_VER_MINOR__ >= 6)))
// Device-side sync is only available under CDPv1:
#if defined(CUB_DETAIL_CDPv1)

#if ((__CUDACC_VER_MAJOR__ > 11) || \
((__CUDACC_VER_MAJOR__ == 11) && (__CUDACC_VER_MINOR__ >= 6)))
// CUDA >= 11.6
#define CUB_TMP_DEVICE_SYNC_IMPL \
result = __cudaDeviceSynchronizeDeprecationAvoidance();
#else // CUDA < 11.6
#else // CUDA < 11.6:
#define CUB_TMP_DEVICE_SYNC_IMPL result = cudaDeviceSynchronize();
#endif

#else // CDPv2 or no CDP:

#define CUB_TMP_DEVICE_SYNC_IMPL /* unavailable */

#endif // CDP version

NV_IF_TARGET(NV_IS_HOST,
(result = cudaDeviceSynchronize();),
(CUB_TMP_DEVICE_SYNC_IMPL));
Expand Down
47 changes: 34 additions & 13 deletions cub/util_device.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ CUB_RUNTIME_FUNCTION inline cudaError_t SmVersion(int &sm_version,
*/
CUB_RUNTIME_FUNCTION inline cudaError_t SyncStream(cudaStream_t stream)
{
cudaError_t result = cudaErrorUnknown;
cudaError_t result = cudaErrorNotSupported;

NV_IF_TARGET(NV_IS_HOST,
(result = CubDebug(cudaStreamSynchronize(stream));),
Expand Down Expand Up @@ -532,21 +532,42 @@ namespace detail
CUB_RUNTIME_FUNCTION inline cudaError_t DebugSyncStream(cudaStream_t stream)
{
#ifndef CUB_DETAIL_DEBUG_ENABLE_SYNC
(void)stream;

return cudaSuccess;
#else
#if 1 // All valid targets currently support device-side synchronization
_CubLog("%s\n", "Synchronizing...");
return SyncStream(stream);
#else
(void)stream;
_CubLog("%s\n",
"WARNING: Skipping CUB `debug_synchronous` synchronization "
"(unsupported target).");
return cudaSuccess;
#endif
#endif

#else // CUB_DETAIL_DEBUG_ENABLE_SYNC:

#define CUB_TMP_SYNC_AVAILABLE \
_CubLog("%s\n", "Synchronizing..."); \
return SyncStream(stream)

#define CUB_TMP_DEVICE_SYNC_UNAVAILABLE \
(void)stream; \
_CubLog("WARNING: Skipping CUB `debug_synchronous` synchronization (%s).\n", \
"device-side sync requires <sm_90, RDC, and CDPv1"); \
return cudaSuccess

#ifdef CUB_DETAIL_CDPv1

// Can sync everywhere but SM_90+
NV_IF_TARGET(NV_PROVIDES_SM_90,
(CUB_TMP_DEVICE_SYNC_UNAVAILABLE;),
(CUB_TMP_SYNC_AVAILABLE;));

#else // CDPv2 or no CDP:

// Can only sync on host
NV_IF_TARGET(NV_IS_HOST,
(CUB_TMP_SYNC_AVAILABLE;),
(CUB_TMP_DEVICE_SYNC_UNAVAILABLE;));

#endif // CDP version

#undef CUB_TMP_DEVICE_SYNC_UNAVAILABLE
#undef CUB_TMP_SYNC_AVAILABLE

#endif // CUB_DETAIL_DEBUG_ENABLE_SYNC
}

/** \brief Gets whether the current device supports unified addressing */
Expand Down