39 #ifndef ECUDA_ALGO_COPY_HPP
40 #define ECUDA_ALGO_COPY_HPP
45 #include "../global.hpp"
46 #include "../algorithm.hpp"
47 #include "../allocators.hpp"
48 #include "../apiwrappers.hpp"
49 #include "../iterator.hpp"
50 #include "../utility.hpp"
59 typedef ecuda::false_type host_type;
60 typedef ecuda::true_type device_type;
89 template<
class InputIterator,
class OutputIterator>
__HOST__ __DEVICE__ inline OutputIterator
copy( InputIterator first, InputIterator last, OutputIterator result );
94 template<
class Iterator>
97 get_iterator_pointer( Iterator& iter )
99 return iter.operator->();
104 typename ecuda::add_pointer<T>::type
105 get_iterator_pointer( T* ptr )
130 template<
class InputIterator,
class OutputIterator>
134 OutputIterator result,
139 namespace device_to_device {
148 template<
class InputIterator,
class OutputIterator>
150 InputIterator first, InputIterator last,
151 OutputIterator result,
156 while( first != last ) { *result = *first; ++first; ++result; }
159 typedef typename std::iterator_traits<OutputIterator>::value_type value_type;
160 typename std::iterator_traits<InputIterator>::difference_type n =
ecuda::distance( first, last );
161 CUDA_CALL( cudaMemcpy<value_type>( result.operator->(), first.operator->(),
static_cast<std::size_t
>(n), cudaMemcpyDeviceToDevice ) );
174 template<
class OutputIterator,
typename T,
typename P>
177 OutputIterator result,
182 while( first != last ) { *result = *first; ++first; ++result; }
193 typename input_iterator_type::contiguous_iterator first2 = first.
contiguous_begin();
201 typedef typename ecuda::add_pointer<value_type>::type pointer;
202 pointer dest = naked_cast<pointer>( result.operator->() );
203 typedef typename ecuda::add_pointer<const value_type>::type const_pointer;
204 const_pointer src = naked_cast<const_pointer>( first.operator->() );
206 const size_t pitch = first.operator->().get_pitch();
207 const std::size_t width = first.
get_width();
209 CUDA_CALL( cudaMemcpy2D<value_type>( dest, width*
sizeof(value_type), src, pitch, width, rows, cudaMemcpyDeviceToDevice ) );
219 typename input_iterator_type::contiguous_iterator first2 = first.
contiguous_begin();
235 template<
class InputIterator,
typename T,
typename P>
237 InputIterator first, InputIterator last,
243 while( first != last ) { *result = *first; ++first; ++result; }
252 const std::size_t leading = result.operator->().get_remaining_width();
253 if( leading < result.operator->().get_width() ) {
262 typedef typename ecuda::add_pointer<value_type>::type pointer;
263 pointer dest = naked_cast<pointer>( result.operator->() );
264 typedef typename ecuda::add_pointer<const value_type>::type const_pointer;
265 const_pointer src = naked_cast<const_pointer>( first.operator->() );
267 const size_t pitch = result.operator->().get_pitch();
268 const std::size_t width = result.operator->().
get_width();
271 CUDA_CALL( cudaMemcpy2D<value_type>( dest, pitch, src, width*
sizeof(value_type), width, rows, cudaMemcpyDeviceToDevice ) );
297 template<
typename T,
typename P,
typename U,
typename Q>
305 while( first != last ) { *result = *first; ++first; ++result; }
328 typedef typename ecuda::add_pointer<value_type>::type pointer;
329 pointer dest = naked_cast<pointer>( result.operator->() );
330 typedef typename ecuda::add_pointer<const value_type>::type const_pointer;
331 const_pointer src = naked_cast<const_pointer>( first.operator->() );
332 const size_t src_pitch = first.operator->().get_pitch();
333 const size_t dest_pitch = result.operator->().get_pitch();
334 const std::size_t width = result.
get_width();
337 CUDA_CALL( cudaMemcpy2D<value_type>( dest, dest_pitch, src, src_pitch, width, rows, cudaMemcpyDeviceToDevice ) );
360 typename input_iterator_type::difference_type n =
ecuda::distance( first, last );
363 const std::size_t copy_width = width > n ? n : width;
364 typename input_iterator_type::contiguous_iterator first2 = first.
contiguous_begin();
365 typename output_iterator_type::contiguous_iterator result2 = result.
contiguous_begin();
368 result += copy_width;
381 template<
class InputIterator,
class OutputIterator>
385 OutputIterator result,
390 while( first != last ) { *result = *first; ++first; ++result; }
398 const bool isSomeKindOfContiguous =
399 ecuda::is_same<input_contiguity,ecuda::true_type>::value ||
400 ecuda::is_same<input_iterator_category,device_contiguous_block_iterator_tag>::value;
401 ECUDA_STATIC_ASSERT(isSomeKindOfContiguous,CANNOT_USE_NONCONTIGUOUS_DEVICE_ITERATOR_AS_SOURCE_FOR_COPY);
408 const bool isSomeKindOfContiguous =
409 ecuda::is_same<output_contiguity,ecuda::true_type>::value ||
410 ecuda::is_same<output_iterator_category,device_contiguous_block_iterator_tag>::value;
411 ECUDA_STATIC_ASSERT(isSomeKindOfContiguous,CANNOT_USE_NONCONTIGUOUS_DEVICE_ITERATOR_AS_DESTINATION_FOR_COPY);
419 const bool isSameType = ecuda::is_same<typename ecuda::remove_const<T>::type,
typename ecuda::remove_const<U>::type>::value;
423 std::vector< U, host_allocator<U> > v2( v1.size() );
449 template<
class InputIterator,
class OutputIterator>
451 InputIterator first, InputIterator last,
452 OutputIterator result,
457 namespace host_to_device {
467 template<
class InputIterator,
class OutputIterator>
471 OutputIterator result,
480 typedef typename ecuda::add_pointer<value_type>::type pointer;
481 pointer dest = naked_cast<pointer>( impl::get_iterator_pointer(result) );
482 typedef typename ecuda::add_pointer<const value_type>::type const_pointer;
483 const_pointer src = naked_cast<const_pointer>( impl::get_iterator_pointer(first) );
484 CUDA_CALL( cudaMemcpy<value_type>( dest, src, static_cast<std::size_t>(n), cudaMemcpyHostToDevice ) );
497 template<
class InputIterator,
typename T,
typename P>
524 typedef typename ecuda::add_pointer<value_type>::type pointer;
525 pointer dest = naked_cast<pointer>( result.operator->() );
526 typedef typename ecuda::add_pointer<const value_type>::type const_pointer;
527 const_pointer src = naked_cast<const_pointer>( get_iterator_pointer(first) );
529 const size_t pitch = result.operator->().get_pitch();
530 const std::size_t width = result.
get_width();
532 CUDA_CALL( cudaMemcpy2D<value_type>( dest, pitch, src, width*
sizeof(value_type), width, rows, cudaMemcpyHostToDevice ) );
557 template<
class InputIterator,
class OutputIterator>
559 InputIterator first, InputIterator last,
560 OutputIterator result,
571 const bool isSomeKindOfContiguous =
572 ecuda::is_same<typename ecuda::iterator_traits<OutputIterator>::is_contiguous,ecuda::true_type>::value ||
574 ECUDA_STATIC_ASSERT(isSomeKindOfContiguous,CANNOT_USE_NONCONTIGUOUS_DEVICE_ITERATOR_AS_DESTINATION_FOR_COPY);
581 const typename std::iterator_traits<InputIterator>::pointer pStart = impl::get_iterator_pointer(first);
582 const typename std::iterator_traits<InputIterator>::pointer pEnd = impl::get_iterator_pointer(last);
584 std::vector< U, host_allocator<U> > v( first, last );
590 const bool isSameType = ecuda::is_same<T,U>::value;
592 std::vector< U, host_allocator<U> > v( first, last );
604 namespace device_to_host {
613 template<
class InputIterator,
class OutputIterator>
617 OutputIterator result,
625 typedef typename ecuda::add_pointer<const value_type>::type src_pointer_type;
626 typedef typename ecuda::add_pointer<value_type>::type dest_pointer_type;
627 src_pointer_type src = naked_cast<src_pointer_type>( impl::get_iterator_pointer(first) );
628 dest_pointer_type dest = naked_cast<dest_pointer_type>( impl::get_iterator_pointer(result) );
630 CUDA_CALL( cudaMemcpy<value_type>( dest, src, static_cast<std::size_t>(n), cudaMemcpyDeviceToHost ) );
643 template<
typename T,
typename P,
class OutputIterator>
647 OutputIterator result,
661 typename input_iterator_type::contiguous_iterator first2 = first.
contiguous_begin();
669 typedef typename ecuda::add_pointer<value_type>::type pointer;
670 pointer dest = get_iterator_pointer( result );
671 typedef typename ecuda::add_pointer<const value_type>::type const_pointer;
672 const_pointer src = naked_cast<const_pointer>( first.operator->() );
674 const size_t pitch = first.operator->().get_pitch();
675 const std::size_t width = first.
get_width();
678 CUDA_CALL( cudaMemcpy2D<value_type>( dest, width*
sizeof(value_type), src, pitch, width, rows, cudaMemcpyDeviceToHost ) );
688 typename input_iterator_type::contiguous_iterator first2 = first.
contiguous_begin();
712 template<
class InputIterator,
class OutputIterator>
716 OutputIterator result,
726 const bool isSomeKindOfContiguous =
727 ecuda::is_same<typename ecuda::iterator_traits<InputIterator>::is_contiguous,ecuda::true_type>::value ||
729 ECUDA_STATIC_ASSERT(isSomeKindOfContiguous,CANNOT_USE_NONCONTIGUOUS_DEVICE_ITERATOR_AS_SOURCE_FOR_COPY);
737 typedef const char* raw_pointer_type;
738 raw_pointer_type pStart = naked_cast<raw_pointer_type>( impl::get_iterator_pointer(result) );
739 OutputIterator result2 = result;
741 raw_pointer_type pEnd = naked_cast<raw_pointer_type>( impl::get_iterator_pointer(result2) );
743 typedef typename ecuda::remove_const<U>::type U2;
744 std::vector< U2, host_allocator<U2> > v( n );
753 const bool isSameType = ecuda::is_same<T,U>::value;
755 typedef typename ecuda::remove_const<T>::type T2;
756 std::vector< T2, host_allocator<T2> > v(
ecuda::distance( first, last ) );
777 template<
class InputIterator,
class OutputIterator>
781 OutputIterator result,
800 template<
class InputIterator,
class OutputIterator>
__HOST__ __DEVICE__ OutputIterator copy(InputIterator first, InputIterator last, OutputIterator result)
Replacement for std::copy.
ECUDA_SUPPRESS_HD_WARNINGS __HOST__ __DEVICE__ void advance(InputIterator &iterator, Distance n)
Increments given iterator by n elements.
Iterator category denoting device memory that is made of contiguous blocks (but the blocks themselves...
base_type::iterator_category iterator_category
Iterator category denoting contiguous device memory.
__HOST__ __DEVICE__ std::size_t get_width() const __NOEXCEPT__
ECUDA_SUPPRESS_HD_WARNINGS __HOST__ __DEVICE__ OutputIterator copy(InputIterator first, InputIterator last, OutputIterator result)
Replacement for std::copy.
__HOST__ __DEVICE__ contiguous_iterator contiguous_begin() const __NOEXCEPT__
ecuda::false_type is_device_iterator
Allocator for page-locked host memory.
__HOST__ __DEVICE__ const T & min(const T &a, const T &b)
Couples together a pair of values.
#define ECUDA_STATIC_ASSERT(x, msg)
base_type::pointer pointer
base_type::difference_type difference_type
#define ECUDA_SUPPRESS_HD_WARNINGS
__HOST__ __DEVICE__ std::size_t get_offset() const __NOEXCEPT__
ECUDA_SUPPRESS_HD_WARNINGS __HOST__ __DEVICE__ std::iterator_traits< Iterator >::difference_type distance(const Iterator &first, const Iterator &last)
base_type::value_type value_type