Extended CUDA Library (ecuda)  2.0
 All Classes Namespaces Files Functions Variables Typedefs Friends Macros
fill.hpp
Go to the documentation of this file.
1 /*
2 Copyright (c) 2015, Scott Zuyderduyn
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7 
8 1. Redistributions of source code must retain the above copyright notice, this
9  list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright notice,
11  this list of conditions and the following disclaimer in the documentation
12  and/or other materials provided with the distribution.
13 
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
15 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
18 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
21 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 
25 The views and conclusions contained in the software and documentation are those
26 of the authors and should not be interpreted as representing official policies,
27 either expressed or implied, of the FreeBSD Project.
28 */
29 
30 //----------------------------------------------------------------------------
31 // algo/fill.hpp
32 //
33 // Extension of std::fill that recognizes device memory and can be called from
34 // host or device code.
35 //
36 // Author: Scott D. Zuyderduyn, Ph.D. (scott.zuyderduyn@utoronto.ca)
37 //----------------------------------------------------------------------------
38 
39 #pragma once
40 #ifndef ECUDA_ALGO_FILL_HPP
41 #define ECUDA_ALGO_FILL_HPP
42 
43 #include <iterator>
44 #include <vector>
45 
46 #include "../global.hpp"
47 #include "../iterator.hpp"
48 //#include "../utility.hpp"
49 
50 namespace ecuda {
51 
52 // forward declaration
53 template<class ForwardIterator,typename T> __HOST__ __DEVICE__ inline void fill( ForwardIterator first, ForwardIterator last, const T& val );
54 
56 namespace impl {
57 
58 namespace fill_device {
59 
60 template<class ForwardIterator,typename T>
61 __HOST__ __DEVICE__ inline void fill(
62  ForwardIterator first, ForwardIterator last,
63  const T& val
64 )
65 {
66  #ifdef __CUDA_ARCH__
67  while( first != last ) { *first = val; ++first; }
68  #else
69  typedef typename ecuda::iterator_traits<ForwardIterator>::is_contiguous iterator_contiguity;
70  {
71  const bool isContiguous = ecuda::is_same<iterator_contiguity,ecuda::true_type>::value;
72  ECUDA_STATIC_ASSERT(isContiguous,CANNOT_FILL_RANGE_REPRESENTED_BY_NONCONTIGUOUS_DEVICE_ITERATOR);
73  }
75  typedef typename ecuda::iterator_traits<ForwardIterator>::value_type value_type;
76  CUDA_CALL( cudaMemset<value_type>( first.operator->(), val, static_cast<std::size_t>(n) ) );
77  #endif
78 }
79 
80 template<typename T,typename P>
81 __HOST__ __DEVICE__ inline void fill(
82  device_contiguous_block_iterator<T,P> first, device_contiguous_block_iterator<T,P> last,
83  const T& val
84 )
85 {
86  #ifdef __CUDA_ARCH__
87  while( first != last ) { *first = val; ++first; }
88  #else
89  typedef device_contiguous_block_iterator<T,P> input_iterator_type;
91  if( first.get_offset() ) {
92  const int leading = first.get_width() - first.get_offset();
93  ::ecuda::fill( first.contiguous_begin(), first.contiguous_begin()+leading, val );
94  first += leading;
95  n -= leading;
96  }
97  const int rows = n / first.get_width();
98  typedef typename ecuda::iterator_traits< device_contiguous_block_iterator<T,P> >::value_type value_type;
99  typedef typename ecuda::add_pointer<value_type>::type pointer_type;
100  CUDA_CALL( cudaMemset2D<value_type>( naked_cast<pointer_type>(first.operator->()), first.operator->().get_pitch(), val, first.get_width(), rows ) );
101  n -= rows * first.get_width();
102  if( n ) ::ecuda::fill( first.contiguous_begin(), first.contiguous_begin()+n, val );
103  #endif
104 }
105 
106 } // namespace fill_device
107 
109 template<class ForwardIterator,typename T>
110 __HOST__ __DEVICE__ inline void fill(
111  ForwardIterator first, ForwardIterator last,
112  const T& val,
113  ecuda::true_type // device memory
114 )
115 {
116  #ifdef __CUDA_ARCH__
117  while( first != last ) { *first = val; ++first; }
118  #else
119  typedef typename ecuda::iterator_traits<ForwardIterator>::is_contiguous iterator_contiguity;
120  typedef typename ecuda::iterator_traits<ForwardIterator>::iterator_category iterator_category;
121  {
122  const bool isSomeKindOfContiguous =
123  ecuda::is_same<iterator_contiguity,ecuda::true_type>::value ||
124  ecuda::is_same<iterator_category,device_contiguous_block_iterator_tag>::value;
125  ECUDA_STATIC_ASSERT(isSomeKindOfContiguous,CANNOT_FILL_RANGE_REPRESENTED_BY_NONCONTIGUOUS_DEVICE_ITERATOR);
126  }
127  if( ecuda::is_same<typename ecuda::iterator_traits<ForwardIterator>::value_type,T>::value ) {
128  fill_device::fill( first, last, val );
129  } else {
130  typedef typename ecuda::iterator_traits<ForwardIterator>::value_type value_type;
131  const value_type val2( val );
132  fill_device::fill( first, last, val2 );
133  }
134  #endif
135 }
136 
137 template<class ForwardIterator,typename T>
138 __HOST__ __DEVICE__ inline void fill(
139  ForwardIterator first, ForwardIterator last,
140  const T& val,
141  ecuda::false_type // host memory
142 )
143 {
144  #ifdef __CUDA_ARCH__
145  // never called from device code
146  #else
147  std::fill( first, last, val );
148  #endif
149 }
150 
151 } // namespace impl
153 
155 template<class ForwardIterator,typename T>
156 __HOST__ __DEVICE__ inline void fill( ForwardIterator first, ForwardIterator last, const T& val )
157 {
158  impl::fill(
159  first, last,
160  val,
162  );
163 }
164 
165 } // namespace ecuda
166 
167 #endif
base_type::iterator_category iterator_category
Definition: iterator.hpp:437
#define CUDA_CALL(x)
Definition: global.hpp:83
#define __HOST__
Definition: global.hpp:150
ecuda::false_type is_device_iterator
Definition: iterator.hpp:441
#define ECUDA_STATIC_ASSERT(x, msg)
Definition: global.hpp:191
base_type::difference_type difference_type
Definition: iterator.hpp:436
#define __DEVICE__
Definition: global.hpp:151
#define ECUDA_SUPPRESS_HD_WARNINGS
Definition: global.hpp:58
ECUDA_SUPPRESS_HD_WARNINGS __HOST__ __DEVICE__ void fill(ForwardIterator first, ForwardIterator last, const T &val)
Definition: fill.hpp:156
ECUDA_SUPPRESS_HD_WARNINGS __HOST__ __DEVICE__ std::iterator_traits< Iterator >::difference_type distance(const Iterator &first, const Iterator &last)
Definition: iterator.hpp:627
base_type::value_type value_type
Definition: iterator.hpp:440
__HOST__ __DEVICE__ void fill(ForwardIterator first, ForwardIterator last, const T &val)
Definition: fill.hpp:156