Skip to content
This repository was archived by the owner on Mar 21, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
4c579e2
Bump CUB for CDP update.
alliepiper Jun 29, 2022
4182f8e
Use seq algos for CDP launches on sm90+.
alliepiper Jun 29, 2022
3c9c838
Ensure kernels are instantiated for CDPv1 nvcc device passes.
alliepiper Aug 12, 2022
f27d762
Add changelog for 1.17.1.
alliepiper Aug 15, 2022
37a8d2b
Bump CUB.
alliepiper Aug 15, 2022
1fe97c6
Add Thrust 2.0.0 changelog.
alliepiper Aug 15, 2022
d6e9274
Bump CUB.
alliepiper Aug 15, 2022
92b3aaf
Merge branch 'ctk12_cdp_updates' into staging/ctk_12.0
alliepiper Aug 16, 2022
89a5fbd
Bump version to 2.0.1 for CTK12.
alliepiper Aug 16, 2022
8fe85d8
Add sm90 option to CMake builds.
alliepiper Aug 16, 2022
d31abae
Merge branch 'ctk12_sm90_cmake_option' into staging/ctk_12.0
alliepiper Aug 16, 2022
5372a29
Bump CUB.
alliepiper Aug 17, 2022
1e6fb36
Move sync from for_each to parallel_for to fix CDP usages.
alliepiper Aug 26, 2022
512272c
Merge pull request #1776 from allisonvacanti/cdp_for_each_fix_backport
alliepiper Aug 29, 2022
f32a788
Merge remote-tracking branch 'origin/2.0.X' into staging/ctk_12.0
alliepiper Aug 29, 2022
f58f28c
Only generate per-dialect targets when needed.
Aug 30, 2022
73a4616
Fix FindTBB.cmake for MSVC 2022.
Aug 30, 2022
786e5aa
Merge pull request #1781 from allisonvacanti/c++20_fix_backport
alliepiper Aug 30, 2022
af8317f
Merge remote-tracking branch 'origin/2.0.X' into staging/ctk_12.0
alliepiper Aug 30, 2022
d205a6f
Merge pull request #1799 from miscco/no_throw
miscco Sep 26, 2022
8a2c7e5
Merge branch 'staging/ctk_12.0' into gpgpu
wmaxey Sep 27, 2022
6c518b0
WAR bug on MSVC 2022.
Aug 31, 2022
0c28b7c
Actually really for real remove throw
miscco Oct 5, 2022
cb86111
Bump CUB.
alliepiper Dec 13, 2022
a31906a
Merge branch 'staging/ctk_12.0' into main
alliepiper Dec 13, 2022
bdd6880
Limit gpuCI configurations to CTK only.
Dec 15, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Move sync from for_each to parallel_for to fix CDP usages.
The device synchronization was decoupled from `THRUST_CDP_DISPATCH`
and was trying to sync regardless of CDP state. This led to
device syncs being invoked from device code when CDP is disabled
and the thread-serial implementation was used.

Some other algorithms that are implemented with `parallel_for`
have also been updated.

Old behavior:

1. `for_each`: calls `parallel_for`
2. `parallel_for`: calls appropriate impl using `THRUST_CDP_DISPATCH`
3. `parallel_for`: returns
4. `for_each`: calls `cub::detail::device_synchronize`

New behavior:

1. `for_each`: calls `parallel_for`
2. `parallel_for`: calls appropriate impl using `THRUST_CDP_DISPATCH`
4. `parallel_for`: calls `cub::detail::device_synchronize`
3. `parallel_for`: returns
  • Loading branch information
alliepiper committed Aug 26, 2022
commit 1e6fb36168bc529c4d8938edc0480dd29cc1aaef
5 changes: 0 additions & 5 deletions thrust/system/cuda/detail/fill.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,6 @@ fill_n(execution_policy<Derived>& policy,
value),
count);

cuda_cub::throw_on_error(
cuda_cub::synchronize_optional(policy)
, "fill_n: failed to synchronize"
);

return first + count;
} // func fill_n

Expand Down
5 changes: 0 additions & 5 deletions thrust/system/cuda/detail/for_each.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,6 @@ namespace cuda_cub {
for_each_f<Input, wrapped_t>(first, wrapped_op),
count);

cuda_cub::throw_on_error(
cuda_cub::synchronize_optional(policy)
, "for_each: failed to synchronize"
);

return first + count;
}

Expand Down
4 changes: 3 additions & 1 deletion thrust/system/cuda/detail/parallel_for.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,9 @@ parallel_for(execution_policy<Derived> &policy,
THRUST_CDP_DISPATCH(
(cudaStream_t stream = cuda_cub::stream(policy);
cudaError_t status = __parallel_for::parallel_for(count, f, stream);
cuda_cub::throw_on_error(status, "parallel_for failed");),
cuda_cub::throw_on_error(status, "parallel_for failed");
status = cuda_cub::synchronize_optional(policy);
cuda_cub::throw_on_error(status, "parallel_for: failed to synchronize");),
// CDP sequential impl:
(for (Size idx = 0; idx != count; ++idx)
{
Expand Down
5 changes: 0 additions & 5 deletions thrust/system/cuda/detail/swap_ranges.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,6 @@ swap_ranges(execution_policy<Derived> &policy,
ItemsIt2>(first1, first2),
num_items);

cuda_cub::throw_on_error(
cuda_cub::synchronize_optional(policy)
, "swap_ranges: failed to synchronize"
);

return first2 + num_items;
}

Expand Down
5 changes: 0 additions & 5 deletions thrust/system/cuda/detail/tabulate.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,6 @@ tabulate(execution_policy<Derived>& policy,
cuda_cub::parallel_for(policy,
functor_t(first, tabulate_op),
count);

cuda_cub::throw_on_error(
cuda_cub::synchronize_optional(policy)
, "tabulate: failed to synchronize"
);
}

} // namespace cuda_cub
Expand Down
10 changes: 0 additions & 10 deletions thrust/system/cuda/detail/transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -232,11 +232,6 @@ namespace __transform {
predicate),
num_items);

cuda_cub::throw_on_error(
cuda_cub::synchronize_optional(policy)
, "transform: failed to synchronize"
);

return result + num_items;
}

Expand Down Expand Up @@ -278,11 +273,6 @@ namespace __transform {
predicate),
num_items);

cuda_cub::throw_on_error(
cuda_cub::synchronize_optional(policy)
, "transform: failed to synchronize"
);

return result + num_items;
}

Expand Down
5 changes: 0 additions & 5 deletions thrust/system/cuda/detail/uninitialized_copy.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,6 @@ uninitialized_copy_n(execution_policy<Derived> &policy,
functor_t(first, result),
count);

cuda_cub::throw_on_error(
cuda_cub::synchronize_optional(policy)
, "uninitialized_copy_n: failed to synchronize"
);

return result + count;
}

Expand Down
5 changes: 0 additions & 5 deletions thrust/system/cuda/detail/uninitialized_fill.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,6 @@ uninitialized_fill_n(execution_policy<Derived>& policy,
functor_t(first, x),
count);

cuda_cub::throw_on_error(
cuda_cub::synchronize_optional(policy)
, "uninitialized_fill_n: failed to synchronize"
);

return first + count;
}

Expand Down