Skip to content
This repository was archived by the owner on Mar 21, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 0 additions & 15 deletions thrust/detail/reference.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,24 +141,9 @@ template<typename Element, typename Pointer, typename Derived>
__host__ __device__
inline void assign_from(OtherPointer src);

// XXX this helper exists only to avoid warnings about null references from the other assign_from
template<typename System1, typename System2, typename OtherPointer>
inline __host__ __device__
void assign_from(System1 *system1, System2 *system2, OtherPointer src);

template<typename System, typename OtherPointer>
__host__ __device__
inline void strip_const_assign_value(const System &system, OtherPointer src);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this strip_const_assign_value() function superfluous? It seems like code that is calling it should just be able to call assign_value() directly.


// XXX this helper exists only to avoid warnings about null references from the other swap
template<typename System>
inline __host__ __device__
void swap(System *system, derived_type &other);

// XXX this helper exists only to avoid warnings about null references from operator value_type ()
template<typename System>
inline __host__ __device__
value_type convert_to_value_type(System *system) const;
}; // end reference

// Output stream operator
Expand Down
70 changes: 16 additions & 54 deletions thrust/detail/reference.inl
Original file line number Diff line number Diff line change
Expand Up @@ -88,30 +88,16 @@ template<typename Element, typename Pointer, typename Derived>
} // end reference::operator=()


template<typename Element, typename Pointer, typename Derived>
template<typename System>
typename reference<Element,Pointer,Derived>::value_type
reference<Element,Pointer,Derived>
::convert_to_value_type(System *system) const
{
using thrust::system::detail::generic::select_system;
return strip_const_get_value(select_system(*system));
} // end convert_to_value_type()


__thrust_exec_check_disable__
template<typename Element, typename Pointer, typename Derived>
reference<Element,Pointer,Derived>
::operator typename reference<Element,Pointer,Derived>::value_type () const
{
typedef typename thrust::iterator_system<pointer>::type System;

// XXX avoid default-constructing a system
// XXX use null a reference for dispatching
// XXX this assumes that the eventual invocation of
// XXX get_value will not access system state
System *system = 0;

return convert_to_value_type(system);
System system;
using thrust::system::detail::generic::select_system;
return strip_const_get_value(select_system(system));
} // end reference::operator value_type ()


Expand All @@ -129,17 +115,7 @@ template<typename Element, typename Pointer, typename Derived>
} // end reference::strip_const_get_value()


template<typename Element, typename Pointer, typename Derived>
template<typename System1, typename System2, typename OtherPointer>
void reference<Element,Pointer,Derived>
::assign_from(System1 *system1, System2 *system2, OtherPointer src)
{
using thrust::system::detail::generic::select_system;

strip_const_assign_value(select_system(*system1, *system2), src);
} // end assign_from()


__thrust_exec_check_disable__
template<typename Element, typename Pointer, typename Derived>
template<typename OtherPointer>
void reference<Element,Pointer,Derived>
Expand All @@ -148,14 +124,12 @@ template<typename Element, typename Pointer, typename Derived>
typedef typename thrust::iterator_system<pointer>::type System1;
typedef typename thrust::iterator_system<OtherPointer>::type System2;

// XXX avoid default-constructing a system
// XXX use null references for dispatching
// XXX this assumes that the eventual invocation of
// XXX assign_value will not access system state
System1 *system1 = 0;
System2 *system2 = 0;
System1 system1;
System2 system2;

using thrust::system::detail::generic::select_system;

assign_from(system1, system2, src);
strip_const_assign_value(select_system(system1, system2), src);
} // end assign_from()


Expand All @@ -172,31 +146,19 @@ template<typename Element, typename Pointer, typename Derived>
} // end strip_const_assign_value()


template<typename Element, typename Pointer, typename Derived>
template<typename System>
void reference<Element,Pointer,Derived>
::swap(System *system, derived_type &other)
{
using thrust::system::detail::generic::select_system;
using thrust::system::detail::generic::iter_swap;

iter_swap(select_system(*system, *system), m_ptr, other.m_ptr);
} // end reference::swap()


__thrust_exec_check_disable__
template<typename Element, typename Pointer, typename Derived>
void reference<Element,Pointer,Derived>
::swap(derived_type &other)
{
typedef typename thrust::iterator_system<pointer>::type System;

// XXX avoid default-constructing a system
// XXX use null references for dispatching
// XXX this assumes that the eventual invocation
// XXX of iter_swap will not access system state
System *system = 0;
System system;

using thrust::system::detail::generic::select_system;
using thrust::system::detail::generic::iter_swap;

swap(system, other);
iter_swap(select_system(system, system), m_ptr, other.m_ptr);
} // end reference::swap()


Expand Down
2 changes: 1 addition & 1 deletion thrust/system/cuda/detail/execute_on_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ class execute_on_stream

public:
__host__ __device__
inline execute_on_stream(cudaStream_t stream)
inline execute_on_stream(cudaStream_t stream = default_stream())
: super_t(stream)
{}
};
Expand Down
10 changes: 4 additions & 6 deletions thrust/system/cuda/detail/reduce.inl
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ OutputType tuned_reduce(execution_policy<DerivedPolicy> &exec,
bulk_::async(bulk_::par(s, g, 1), reduce_detail::reduce_partitions(), bulk_::root.this_exec, partial_sums.begin(), partial_sums.end(), partial_sums.begin(), binary_op);
} // end while

return partial_sums[0];
return get_value(exec, &partial_sums[0]);
} // end tuned_reduce()


Expand All @@ -154,7 +154,7 @@ OutputType general_reduce(execution_policy<DerivedPolicy> &exec,

cudaStream_t s = stream(thrust::detail::derived_cast(exec));

typedef thrust::detail::temporary_array<OutputType,thrust::cuda::tag> temporary_array;
typedef thrust::detail::temporary_array<OutputType,DerivedPolicy> temporary_array;

// automatically choose a number of groups and a group size
size_type num_groups = 0;
Expand All @@ -165,9 +165,7 @@ OutputType general_reduce(execution_policy<DerivedPolicy> &exec,
num_groups = thrust::min<size_type>(num_groups, thrust::detail::util::divide_ri(n, group_size));

uniform_decomposition<size_type> decomp(n, num_groups);

thrust::cuda::tag t;
temporary_array partial_sums(t, decomp.size());
temporary_array partial_sums(exec, decomp.size());

// reduce into partial sums
bulk_::async(bulk_::grid(decomp.size(), group_size, bulk_::use_default, s), reduce_partitions(), bulk_::root.this_exec, first, decomp, partial_sums.begin(), init, binary_op);
Expand All @@ -181,7 +179,7 @@ OutputType general_reduce(execution_policy<DerivedPolicy> &exec,
bulk_::async(bulk_::grid(num_groups, group_size, bulk_::use_default, s), reduce_partitions(), bulk_::root.this_exec, partial_sums.begin(), partial_sums.end(), partial_sums.begin(), binary_op);
} // end while

return partial_sums[0];
return get_value(exec, &partial_sums[0]);
} // end general_reduce()


Expand Down
4 changes: 4 additions & 0 deletions thrust/system/cuda/detail/synchronize.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,15 @@ namespace detail
inline __host__ __device__
void synchronize(const char *message = "");

inline __host__ __device__
void synchronize(cudaStream_t stream, const char *message = "");


inline __host__ __device__
void synchronize_if_enabled(const char *message = "");



} // end namespace detail
} // end namespace cuda
} // end namespace system
Expand Down
10 changes: 10 additions & 0 deletions thrust/system/cuda/detail/synchronize.inl
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,16 @@ void synchronize(const char *message)
} // end synchronize()


inline __host__ __device__
void synchronize(cudaStream_t stream, const char *message)
{
#if !defined(__CUDA_ARCH__)
throw_on_error(cudaStreamSynchronize(stream), message);
#else
synchronize(message);
#endif
}

inline __host__ __device__
void synchronize_if_enabled(const char *message)
{
Expand Down
43 changes: 36 additions & 7 deletions thrust/system/cuda/detail/trivial_copy.inl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <thrust/system_error.h>
#include <thrust/system/cuda/error.h>
#include <thrust/system/cuda/detail/throw_on_error.h>
#include <thrust/system/cuda/detail/synchronize.h>
#include <thrust/iterator/iterator_traits.h>
#include <thrust/system/cpp/detail/execution_policy.h>
#include <thrust/detail/raw_pointer_cast.h>
Expand Down Expand Up @@ -51,17 +52,17 @@ inline void checked_cudaMemcpyAsync(void *dst, const void *src, size_t count, en

template<typename System1,
typename System2>
cudaMemcpyKind cuda_memcpy_kind(const thrust::cuda::execution_policy<System1> &,
const thrust::cpp::execution_policy<System2> &)
cudaMemcpyKind cuda_memcpy_kind(const thrust::cuda::execution_policy<System1> &,
const thrust::cpp::execution_policy<System2> &)
{
return cudaMemcpyDeviceToHost;
} // end cuda_memcpy_kind()


template<typename System1,
typename System2>
cudaMemcpyKind cuda_memcpy_kind(const thrust::cpp::execution_policy<System1> &,
const thrust::cuda::execution_policy<System2> &)
cudaMemcpyKind cuda_memcpy_kind(const thrust::cpp::execution_policy<System1> &,
const thrust::cuda::execution_policy<System2> &)
{
return cudaMemcpyHostToDevice;
} // end cuda_memcpy_kind()
Expand All @@ -81,6 +82,32 @@ cudaMemcpyKind cuda_memcpy_kind(const thrust::cuda::execution_policy<System> &,
#endif
} // end cuda_memcpy_kind()

template<typename System1,
typename System2>
cudaStream_t cuda_memcpy_stream(const thrust::cuda::execution_policy<System1> &exec,
const thrust::cpp::execution_policy<System2> &)
{
return stream(derived_cast(exec));
} // end cuda_memcpy_stream()

template<typename System1,
typename System2>
cudaStream_t cuda_memcpy_stream(const thrust::cpp::execution_policy<System1> &,
const thrust::cuda::execution_policy<System2> &exec)
{
return stream(derived_cast(exec));
} // end cuda_memcpy_stream()


template<typename System>
cudaStream_t cuda_memcpy_stream(const thrust::cuda::execution_policy<System> &,
const thrust::cuda::execution_policy<System> &exec)
{
return stream(derived_cast(exec));
} // end cuda_memcpy_stream()





} // end namespace trivial_copy_detail
Expand Down Expand Up @@ -132,9 +159,11 @@ void trivial_copy_n(cross_system<System1,System2> &systems,

cudaMemcpyKind kind = trivial_copy_detail::cuda_memcpy_kind(thrust::detail::derived_cast(systems.system1), thrust::detail::derived_cast(systems.system2));

// XXX use the globally-blocking legacy stream for now
// we may wish to enable async host <-> device copy in the future
trivial_copy_detail::checked_cudaMemcpyAsync(dst, src, n * sizeof(T), kind, legacy_stream());

// async host <-> device copy , but synchronize on a user provided stream
cudaStream_t s = trivial_copy_detail::cuda_memcpy_stream(derived_cast(systems.system1), derived_cast(systems.system2));
trivial_copy_detail::checked_cudaMemcpyAsync(dst, src, n * sizeof(T), kind, s);
synchronize(s, "failed synchronize in thrust::system::cuda::detail::trivial_copy_n");
}


Expand Down