Extended CUDA Library (ecuda)  2.0
 All Classes Namespaces Files Functions Variables Typedefs Friends Macros
apiwrappers.hpp
Go to the documentation of this file.
1 /*
2 Copyright (c) 2014-2016, Scott Zuyderduyn
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7 
8 1. Redistributions of source code must retain the above copyright notice, this
9  list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright notice,
11  this list of conditions and the following disclaimer in the documentation
12  and/or other materials provided with the distribution.
13 
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
15 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
18 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
21 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 
25 The views and conclusions contained in the software and documentation are those
26 of the authors and should not be interpreted as representing official policies,
27 either expressed or implied, of the FreeBSD Project.
28 */
29 
30 //----------------------------------------------------------------------------
31 // apiwrappers.hpp
32 //
33 // Wrappers around CUDA C API functions.
34 //
35 // Author: Scott D. Zuyderduyn, Ph.D. (scott.zuyderduyn@utoronto.ca)
36 //----------------------------------------------------------------------------
37 
38 #pragma once
39 #ifndef ECUDA_APIWRAPPERS_HPP
40 #define ECUDA_APIWRAPPERS_HPP
41 
42 #include "global.hpp"
43 #include "allocators.hpp" // for host_allocator
44 
45 #include <vector>
46 
47 namespace ecuda {
48 
61 template<typename T>
62 inline cudaError_t cudaMemcpy( T* dest, const T* src, const size_t count, cudaMemcpyKind kind )
63 {
64  return ::cudaMemcpy( reinterpret_cast<void*>(dest), reinterpret_cast<const void*>(src), sizeof(T)*count, kind );
65 }
66 
83 template<typename T>
84 inline cudaError_t cudaMemcpy2D( T* dest, const size_t dpitch, const T* src, const size_t spitch, const size_t width, const size_t height, cudaMemcpyKind kind )
85 {
86  return ::cudaMemcpy2D( reinterpret_cast<void*>(dest), dpitch, reinterpret_cast<const void*>(src), spitch, width*sizeof(T), height, kind );
87 }
88 
90 namespace impl {
91 
101 template<typename T>
102 bool is_equal_bytes( const T& value )
103 {
104  const char* p = reinterpret_cast<const char*>(&value);
105  const char* q = p; ++q;
106  for( int i = 1; i < sizeof(T); ++i, ++q ) if( *p != *q ) return false;
107  return true;
108 }
109 
110 } // namespace impl
112 
124 inline cudaError_t cudaMemset( char* devPtr, const char& value, const size_t count )
125 {
126  return ::cudaMemset( static_cast<void*>(devPtr), static_cast<int>(value), count );
127 }
128 
146 template<typename T>
147 inline cudaError_t cudaMemset( T* devPtr, const T& value, const size_t count )
148 {
149  //TODO: may want to implement logic to limit the size of the staging memory, and do the fill in chunks if count is too large
150  if( impl::is_equal_bytes(value) ) {
151  return cudaMemset( reinterpret_cast<char*>(devPtr), *reinterpret_cast<const char*>(&value), count*sizeof(T) );
152  }
153  std::vector< T, host_allocator<T> > v( count, value );
154  return cudaMemcpy<T>( devPtr, &v.front(), count, cudaMemcpyHostToDevice );
155 }
156 
170 inline cudaError_t cudaMemset2D( char* devPtr, const size_t pitch, const char& value, const size_t width, const size_t height )
171 {
172  return ::cudaMemset2D( static_cast<void*>(devPtr), pitch, static_cast<int>(value), width, height );
173 }
174 
194 template<typename T>
195 cudaError_t cudaMemset2D( T* devPtr, const size_t pitch, const T& value, const size_t width, const size_t height )
196 {
197  if( impl::is_equal_bytes(value) ) {
198  return cudaMemset2D( reinterpret_cast<char*>(devPtr), pitch, *reinterpret_cast<const char*>(&value), width*sizeof(T), height );
199  }
200  std::vector< T, host_allocator<T> > v( width, value );
201  char* charPtr = reinterpret_cast<char*>(devPtr);
202  for( std::size_t i = 0; i < height; ++i, charPtr += pitch ) {
203  const cudaError_t rc = cudaMemcpy<T>( reinterpret_cast<T*>(charPtr), &v.front(), width, cudaMemcpyHostToDevice );
204  if( rc != cudaSuccess ) return rc;
205  }
206  return cudaSuccess;
207 }
208 
209 template<typename T>
210 inline cudaError_t cudaMemcpyToSymbol( T* dest, const T* src, size_t count=1, size_t offset=0, enum cudaMemcpyKind kind=cudaMemcpyHostToDevice )
211 {
212  return ::cudaMemcpyToSymbol( reinterpret_cast<const char*>(dest), reinterpret_cast<const void*>(src), count*sizeof(T), offset, kind );
213 }
214 
215 template<typename T>
216 inline cudaError_t cudaMemcpyToSymbol( T& dest, const T& src, enum cudaMemcpyKind kind=cudaMemcpyHostToDevice )
217 {
218  return ::ecuda::cudaMemcpyToSymbol( &dest, &src, 1, 0, kind );
219 }
220 
221 /*
222  * This is here because of a bizarre compiler bug in nvcc 5.5.
223  * If __threadfence() is called inline (i.e. the at() methods of
224  * each container), then nvcc complains about not knowing about it.
225  *
226  * Example compiler message: error: there are no arguments to ‘__threadfence’ that depend on a template parameter, so a declaration of ‘__threadfence’ must be available
227  *
228  * In CUDA >=6.0 the same code compiles fine. If we do below and just wrap the __threadfence()
229  * call in it's own function then it works in all versions.
230  */
231 inline __DEVICE__ void threadfence()
232 {
233  #ifdef __CUDACC__
234  __threadfence();
235  #endif
236 }
237 
238 } // namespace ecuda
239 
240 #endif
cudaError_t cudaMemset(char *devPtr, const char &value, const size_t count)
Re-implementation of CUDA API function cudaMemset that enforces a single-byte value.
cudaError_t cudaMemset2D(char *devPtr, const size_t pitch, const char &value, const size_t width, const size_t height)
Re-implementation of CUDA API function cudaMemset2D that enforces a single-byte value.
cudaError_t cudaMemset(T *devPtr, const T &value, const size_t count)
Re-implementation of CUDA API function cudaMemset that allows for any data type.
cudaError_t cudaMemcpyToSymbol(T *dest, const T *src, size_t count=1, size_t offset=0, enum cudaMemcpyKind kind=cudaMemcpyHostToDevice)
cudaError_t cudaMemcpy(T *dest, const T *src, const size_t count, cudaMemcpyKind kind)
Wrapper around CUDA API function cudaMemcpy.
Definition: apiwrappers.hpp:62
cudaError_t cudaMemcpy2D(T *dest, const size_t dpitch, const T *src, const size_t spitch, const size_t width, const size_t height, cudaMemcpyKind kind)
Wrapper around CUDA API function cudaMemcpy2D.
Definition: apiwrappers.hpp:84
cudaError_t cudaMemset2D(T *devPtr, const size_t pitch, const T &value, const size_t width, const size_t height)
Re-implementation of CUDA API function cudaMemset2D that allows for any data type.
__DEVICE__ void threadfence()
#define __DEVICE__
Definition: global.hpp:151
ECUDA_SUPPRESS_HD_WARNINGS __HOST__ __DEVICE__ ecuda::iterator_traits< InputIterator >::difference_type count(InputIterator first, InputIterator last, const T &value)
Definition: count.hpp:92
cudaError_t cudaMemcpyToSymbol(T &dest, const T &src, enum cudaMemcpyKind kind=cudaMemcpyHostToDevice)