Skip to content
This repository was archived by the owner on Mar 21, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 18 additions & 18 deletions examples/dot_products_with_zip.cu
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
#include <thrust/random.h>


// This example shows how thrust::zip_iterator can be used to create a
// 'virtual' array of structures. In this case the structure is a 3d
// vector type (Float3) whose (x,y,z) components will be stored in
// This example shows how thrust::zip_iterator can be used to create a
// 'virtual' array of structures. In this case the structure is a 3d
// vector type (Float3) whose (x,y,z) components will be stored in
// three separate float arrays. The zip_iterator "zips" these arrays
// into a single virtual Float3 array.

Expand Down Expand Up @@ -54,17 +54,17 @@ int main(void)
// We'll store the components of the 3d vectors in separate arrays. One set of
// arrays will store the 'A' vectors and another set will store the 'B' vectors.

// This 'structure of arrays' (SoA) approach is usually more efficient than the
// This 'structure of arrays' (SoA) approach is usually more efficient than the
// 'array of structures' (AoS) approach. The primary reason is that structures,
// like Float3, don't always obey the memory coalescing rules, so they are not
// efficiently transferred to and from memory. Another reason to prefer SoA to
// AoS is that we don't aways want to process all members of the structure. For
// example, if we only need to look at first element of the structure then it
// example, if we only need to look at first element of the structure then it
// is wasteful to load the entire structure from memory. With the SoA approach,
// we can chose which elements of the structure we wish to read.

thrust::device_vector<float> A0 = random_vector(N); // x components of the 'A' vectors
thrust::device_vector<float> A1 = random_vector(N); // y components of the 'A' vectors
thrust::device_vector<float> A1 = random_vector(N); // y components of the 'A' vectors
thrust::device_vector<float> A2 = random_vector(N); // z components of the 'A' vectors

thrust::device_vector<float> B0 = random_vector(N); // x components of the 'B' vectors
Expand All @@ -78,7 +78,7 @@ int main(void)
// We'll now illustrate two ways to use zip_iterator to compute the dot
// products. The first method is verbose but shows how the parts fit together.
// The second method hides these details and is more concise.


// METHOD #1
// Defining a zip_iterator type can be a little cumbersome ...
Expand All @@ -87,24 +87,24 @@ int main(void)
typedef thrust::zip_iterator<FloatIteratorTuple> Float3Iterator;

// Now we'll create some zip_iterators for A and B
Float3Iterator A_first = thrust::make_zip_iterator(make_tuple(A0.begin(), A1.begin(), A2.begin()));
Float3Iterator A_last = thrust::make_zip_iterator(make_tuple(A0.end(), A1.end(), A2.end()));
Float3Iterator B_first = thrust::make_zip_iterator(make_tuple(B0.begin(), B1.begin(), B2.begin()));
Float3Iterator A_first = thrust::make_zip_iterator(thrust::make_tuple(A0.begin(), A1.begin(), A2.begin()));
Float3Iterator A_last = thrust::make_zip_iterator(thrust::make_tuple(A0.end(), A1.end(), A2.end()));
Float3Iterator B_first = thrust::make_zip_iterator(thrust::make_tuple(B0.begin(), B1.begin(), B2.begin()));

// Finally, we pass the zip_iterators into transform() as if they
// were 'normal' iterators for a device_vector<Float3>.
thrust::transform(A_first, A_last, B_first, result.begin(), DotProduct());


// METHOD #2
// Alternatively, we can avoid creating variables for X_first, X_last,
// Alternatively, we can avoid creating variables for X_first, X_last,
// and Y_first and invoke transform() directly.
thrust::transform( thrust::make_zip_iterator(make_tuple(A0.begin(), A1.begin(), A2.begin())),
thrust::make_zip_iterator(make_tuple(A0.end(), A1.end(), A2.end())),
thrust::make_zip_iterator(make_tuple(B0.begin(), B1.begin(), B2.begin())),
thrust::transform( thrust::make_zip_iterator(thrust::make_tuple(A0.begin(), A1.begin(), A2.begin())),
thrust::make_zip_iterator(thrust::make_tuple(A0.end(), A1.end(), A2.end())),
thrust::make_zip_iterator(thrust::make_tuple(B0.begin(), B1.begin(), B2.begin())),
result.begin(),
DotProduct() );



// Finally, we'll print a few results
Expand All @@ -126,8 +126,8 @@ int main(void)
std::cout << "(" << thrust::get<0>(b) << "," << thrust::get<1>(b) << "," << thrust::get<2>(b) << ")";
std::cout << " = ";
std::cout << dot << std::endl;
}
}

return 0;
}

17 changes: 11 additions & 6 deletions thrust/detail/allocator/temporary_allocator.inl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
#include <thrust/system/detail/bad_alloc.h>
#include <cassert>

#if defined(__CUDA_ARCH__) && THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA
#if (defined(__NVCOMPILER_CUDA__) || defined(__CUDA_ARCH__)) && \
THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA
#include <thrust/system/cuda/detail/terminate.h>
#endif

Expand All @@ -45,11 +46,15 @@ __host__ __device__
// note that we pass cnt to deallocate, not a value derived from result.second
deallocate(result.first, cnt);

#if !defined(__CUDA_ARCH__)
throw thrust::system::detail::bad_alloc("temporary_buffer::allocate: get_temporary_buffer failed");
#elif THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA
thrust::system::cuda::detail::terminate_with_message("temporary_buffer::allocate: get_temporary_buffer failed");
#endif
if (THRUST_IS_HOST_CODE) {
#if THRUST_INCLUDE_HOST_CODE
throw thrust::system::detail::bad_alloc("temporary_buffer::allocate: get_temporary_buffer failed");
#endif
} else {
#if THRUST_INCLUDE_DEVICE_CODE
thrust::system::cuda::detail::terminate_with_message("temporary_buffer::allocate: get_temporary_buffer failed");
#endif
}
} // end if

return result.first;
Expand Down
21 changes: 19 additions & 2 deletions thrust/detail/config/cpp_compatibility.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,13 @@
// FIXME: Combine THRUST_INLINE_CONSTANT and
// THRUST_INLINE_INTEGRAL_MEMBER_CONSTANT into one macro when NVCC properly
// supports `constexpr` globals in host and device code.
#ifdef __CUDA_ARCH__
#if defined(__CUDA_ARCH__) || defined(__NVCOMPILER_CUDA__)
// FIXME: Add this when NVCC supports inline variables.
//# if THRUST_CPP_DIALECT >= 2017
//# define THRUST_INLINE_CONSTANT inline constexpr
//# define THRUST_INLINE_INTEGRAL_MEMBER_CONSTANT inline constexpr
# if THRUST_CPP_DIALECT >= 2011
# define THRUST_INLINE_CONSTANT static constexpr
# define THRUST_INLINE_CONSTANT static const __device__
# define THRUST_INLINE_INTEGRAL_MEMBER_CONSTANT static constexpr
# else
# define THRUST_INLINE_CONSTANT static const __device__
Expand All @@ -75,3 +75,20 @@
# endif
#endif

#if defined(__NVCOMPILER_CUDA__)
# define THRUST_IS_DEVICE_CODE __builtin_is_device_code()
# define THRUST_IS_HOST_CODE (!__builtin_is_device_code())
# define THRUST_INCLUDE_DEVICE_CODE 1
# define THRUST_INCLUDE_HOST_CODE 1
#elif defined(__CUDA_ARCH__)
# define THRUST_IS_DEVICE_CODE 1
# define THRUST_IS_HOST_CODE 0
# define THRUST_INCLUDE_DEVICE_CODE 1
# define THRUST_INCLUDE_HOST_CODE 0
#else
# define THRUST_IS_DEVICE_CODE 0
# define THRUST_IS_HOST_CODE 1
# define THRUST_INCLUDE_DEVICE_CODE 0
# define THRUST_INCLUDE_HOST_CODE 1
#endif

6 changes: 5 additions & 1 deletion thrust/detail/config/exec_check_disable.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,11 @@

#include <thrust/detail/config.h>

#if defined(__CUDACC__) && !(defined(__CUDA__) && defined(__clang__))
// #pragma nv_exec_check_disable is only recognized by NVCC. Having a macro
// expand to a #pragma (rather than _Pragma) only works with NVCC's compilation
// model, not with other compilers.
#if defined(__CUDACC__) && !defined(__NVCOMPILER_CUDA__) && \
!(defined(__CUDA__) && defined(__clang__))

#define __thrust_exec_check_disable__ #pragma nv_exec_check_disable

Expand Down
20 changes: 12 additions & 8 deletions thrust/detail/contiguous_storage.inl
Original file line number Diff line number Diff line change
Expand Up @@ -430,15 +430,19 @@ __host__ __device__
void contiguous_storage<T,Alloc>
::swap_allocators(false_type, Alloc &other)
{
#ifdef __CUDA_ARCH__
// allocators must be equal when swapping containers with allocators that propagate on swap
assert(!is_allocator_not_equal(other));
#else
if (is_allocator_not_equal(other))
{
throw allocator_mismatch_on_swap();
if (THRUST_IS_DEVICE_CODE) {
#if THRUST_INCLUDE_DEVICE_CODE
// allocators must be equal when swapping containers with allocators that propagate on swap
assert(!is_allocator_not_equal(other));
#endif
} else {
#if THRUST_INCLUDE_HOST_CODE
if (is_allocator_not_equal(other))
{
throw allocator_mismatch_on_swap();
}
#endif
}
#endif
thrust::swap(m_allocator, other);
} // end contiguous_storage::swap_allocators()

Expand Down
2 changes: 1 addition & 1 deletion thrust/detail/functional/actor.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ template<typename Eval>
typedef Eval eval_type;

__host__ __device__
actor(void);
THRUST_CONSTEXPR actor();

__host__ __device__
actor(const Eval &base);
Expand Down
4 changes: 2 additions & 2 deletions thrust/detail/functional/actor.inl
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ namespace functional

template<typename Eval>
__host__ __device__
actor<Eval>
::actor(void)
THRUST_CONSTEXPR actor<Eval>
::actor()
: eval_type()
{}

Expand Down
2 changes: 1 addition & 1 deletion thrust/detail/functional/argument.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ template<unsigned int i>
};

__host__ __device__
argument(void){}
THRUST_CONSTEXPR argument(){}

template<typename Env>
__host__ __device__
Expand Down
35 changes: 20 additions & 15 deletions thrust/detail/integer_math.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,22 +32,27 @@ template <typename Integer>
__host__ __device__ __thrust_forceinline__
Integer clz(Integer x)
{
#if __CUDA_ARCH__
return ::__clz(x);
#else
int num_bits = 8 * sizeof(Integer);
int num_bits_minus_one = num_bits - 1;

for (int i = num_bits_minus_one; i >= 0; --i)
{
if ((Integer(1) << i) & x)
{
return num_bits_minus_one - i;
}
Integer result;
if (THRUST_IS_DEVICE_CODE) {
#if THRUST_INCLUDE_DEVICE_CODE
result = ::__clz(x);
#endif
} else {
#if THRUST_INCLUDE_HOST_CODE
int num_bits = 8 * sizeof(Integer);
int num_bits_minus_one = num_bits - 1;
result = num_bits;
for (int i = num_bits_minus_one; i >= 0; --i)
{
if ((Integer(1) << i) & x)
{
result = num_bits_minus_one - i;
break;
}
}
#endif
}

return num_bits;
#endif
return result;
}

template <typename Integer>
Expand Down
8 changes: 2 additions & 6 deletions thrust/detail/seq.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ struct seq_t : thrust::system::detail::sequential::execution_policy<seq_t>,
thrust::system::detail::sequential::execution_policy>
{
__host__ __device__
seq_t() : thrust::system::detail::sequential::execution_policy<seq_t>() {}
THRUST_CONSTEXPR seq_t() : thrust::system::detail::sequential::execution_policy<seq_t>() {}

// allow any execution_policy to convert to seq_t
template<typename DerivedPolicy>
Expand All @@ -45,11 +45,7 @@ struct seq_t : thrust::system::detail::sequential::execution_policy<seq_t>,
} // end detail


#ifdef __CUDA_ARCH__
static const __device__ detail::seq_t seq;
#else
static const detail::seq_t seq;
#endif
THRUST_INLINE_CONSTANT detail::seq_t seq;


} // end thrust
Expand Down
6 changes: 1 addition & 5 deletions thrust/execution_policy.h
Original file line number Diff line number Diff line change
Expand Up @@ -344,11 +344,7 @@ static const detail::host_t host;
* \see host_execution_policy
* \see thrust::device
*/
#ifdef __CUDA_ARCH__
static const __device__ detail::device_t device;
#else
static const detail::device_t device;
#endif
THRUST_INLINE_CONSTANT detail::device_t device;


// define seq for the purpose of Doxygenating it
Expand Down
Loading