Extended CUDA Library (ecuda)  2.0
 All Classes Namespaces Files Functions Variables Typedefs Friends Macros
host_emulation.hpp
Go to the documentation of this file.
1 /*
2 Copyright (c) 2015, Scott Zuyderduyn
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7 
8 1. Redistributions of source code must retain the above copyright notice, this
9  list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright notice,
11  this list of conditions and the following disclaimer in the documentation
12  and/or other materials provided with the distribution.
13 
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
15 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
18 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
21 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 
25 The views and conclusions contained in the software and documentation are those
26 of the authors and should not be interpreted as representing official policies,
27 either expressed or implied, of the FreeBSD Project.
28 */
29 
30 //----------------------------------------------------------------------------
31 // impl/host_emulation.hpp
32 //
33 // Included when host/CPU-only emulation is desired.
34 //
35 // Author: Scott D. Zuyderduyn, Ph.D. (scott.zuyderduyn@utoronto.ca)
36 //----------------------------------------------------------------------------
37 
38 #pragma once
39 #ifndef ECUDA_IMPL_HOST_IMPLEMENTATION_HPP
40 #define ECUDA_IMPL_HOST_IMPLEMENTATION_HPP
41 
48 #ifndef __CUDACC__
49 
50 #include <algorithm>
51 #include <memory>
52 #include <ctime>
53 
54 #define __global__
55 #define __device__
56 #define __host__
57 #define __constant__
58 
59 enum cudaError_t
60 {
61  cudaSuccess
62 };
63 
64 enum cudaMemcpyKind {
65  cudaMemcpyDeviceToDevice,
66  cudaMemcpyDeviceToHost,
67  cudaMemcpyHostToDevice
68 };
69 
70 cudaError_t cudaFree( void* devPtr )
71 {
72  delete [] reinterpret_cast<char*>(devPtr); // TODO: does this work as expected?
73  return cudaSuccess;
74 }
75 
76 inline cudaError_t cudaFreeHost( void* devPtr ) { return cudaFree( devPtr ); }
77 
78 void cudaSetDevice( int ) {}
79 
80 cudaError_t cudaMalloc( void** devPtr, size_t size )
81 {
82  *devPtr = std::allocator<char>().allocate( size );
83  return cudaSuccess;
84 }
85 
86 #define cudaHostAllocDefault 0x00
87 #define cudaHostAllocPortable 0x01
88 #define cudaHostAllocMapped 0x02
89 #define cudaHostAllocWriteCombined 0x04
90 
91 inline cudaError_t cudaHostAlloc( void** ptr, size_t size, unsigned flags = 0 ) { return cudaMalloc( ptr, size ); }
92 
93 cudaError_t cudaMallocPitch( void** devPtr, size_t* pitch, size_t width, size_t height )
94 {
95  *pitch = width;
96  *pitch += (*pitch % 16); // add padding to get 128-bit memory alignment (16 bytes)
97  if( ( width % *pitch ) == 0 ) {
98  ++(*pitch); // just add a byte to get some padding
99  //std::cerr << "WARNING: Host emulation of cudaMallocPitch allocated the equivalent to a contiguous block and so is a poor test of API logic for pitched memory." << std::endl;
100  }
101  *devPtr = std::allocator<char>().allocate( (*pitch)*height );
102  return cudaSuccess;
103 }
104 
105 cudaError_t cudaMemcpy( void* dst, const void* src, size_t count, cudaMemcpyKind )
106 {
107  std::copy( reinterpret_cast<const char*>(src), reinterpret_cast<const char*>(src)+count, reinterpret_cast<char*>(dst) );
108  return cudaSuccess;
109 }
110 
111 cudaError_t cudaMemcpy2D( void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, cudaMemcpyKind )
112 {
113  char* pDst = reinterpret_cast<char*>(dst);
114  const char* pSrc = reinterpret_cast<const char*>(src);
115  for( size_t i = 0; i < height; ++i, pDst += dpitch, pSrc += spitch ) std::copy( pSrc, pSrc+width, pDst );
116  return cudaSuccess;
117 }
118 
119 cudaError_t cudaMemcpyToSymbol( const char* dest, const void* src, size_t count, size_t offset = 0, cudaMemcpyKind = cudaMemcpyHostToDevice )
120 {
121  char* pDst = const_cast<char*>(dest);
122  pDst += offset;
123  const char* pSrc = reinterpret_cast<const char*>(src);
124  std::copy( pSrc, pSrc+count, pDst );
125  return cudaSuccess;
126 }
127 
128 cudaError_t cudaMemset( void* devPtr, int value, size_t count )
129 {
130  char* p = static_cast<char*>(devPtr);
131  for( size_t i = 0; i < count; ++i, ++p ) *p = static_cast<char>(value);
132  return cudaSuccess;
133 }
134 
135 cudaError_t cudaMemset2D( void* devPtr, size_t pitch, int value, size_t width, size_t height )
136 {
137  char* p = static_cast<char*>(devPtr);
138  for( std::size_t i = 0; i < height; ++i ) {
139  for( std::size_t j = 0; j < pitch; ++j, ++p ) if( j < width ) *p = static_cast<char>(value);
140  }
141  return cudaSuccess;
142 }
143 
144 namespace impl {
145 
146 struct cudaEvent
147 {
148  std::clock_t time;
149 };
150 
151 } // namespace impl
152 
153 typedef impl::cudaEvent* cudaEvent_t;
154 
155 typedef int cudaStream_t;
156 
157 cudaError_t cudaEventCreate( cudaEvent_t* event )
158 {
159  *event = new impl::cudaEvent;
160  return cudaSuccess;
161 }
162 
163 cudaError_t cudaEventCreateWithFlags( cudaEvent_t* event, unsigned ) { return cudaEventCreate(event); }
164 
165 cudaError_t cudaEventRecord( cudaEvent_t event, cudaStream_t = 0 )
166 {
167  event->time = std::clock();
168  return cudaSuccess;
169 }
170 
171 cudaError_t cudaEventQuery( cudaEvent_t ) { return cudaSuccess; }
172 
173 cudaError_t cudaEventDestroy( cudaEvent_t event )
174 {
175  if( event ) delete event;
176  return cudaSuccess;
177 }
178 
179 cudaError_t cudaEventSynchronize( cudaEvent_t event ) { return cudaSuccess; }
180 
181 cudaError_t cudaEventElapsedTime( float* ms, cudaEvent_t start, cudaEvent_t end )
182 {
183  *ms = static_cast<double>( end->time - start->time ) / static_cast<double>(CLOCKS_PER_SEC) * static_cast<double>(1000);
184  return cudaSuccess;
185 }
186 
187 struct cudaDeviceProp {};
188 
189 cudaError_t cudaGetDeviceProperties( cudaDeviceProp*, int ) { return cudaSuccess; }
190 
191 cudaError_t cudaDriverGetVersion( int* driverVersion ) { *driverVersion = 0; return cudaSuccess; }
192 
193 cudaError_t cudaRuntimeGetVersion( int* runtimeVersion ) { *runtimeVersion = 0; return cudaSuccess; }
194 
195 cudaError_t cudaGetDeviceCount( int* count ) { *count = 0; return cudaSuccess; }
196 
197 #endif // __CUDACC__
198 
200 #endif // ECUDA_IMPL_HOST_IMPLEMENTATION_HPP
__HOST__ __DEVICE__ OutputIterator copy(InputIterator first, InputIterator last, OutputIterator result)
Replacement for std::copy.
Definition: copy.hpp:801
cudaError_t cudaMemset(char *devPtr, const char &value, const size_t count)
Re-implementation of CUDA API function cudaMemset that enforces a single-byte value.
cudaError_t cudaMemset2D(char *devPtr, const size_t pitch, const char &value, const size_t width, const size_t height)
Re-implementation of CUDA API function cudaMemset2D that enforces a single-byte value.
cudaError_t cudaMemcpyToSymbol(T *dest, const T *src, size_t count=1, size_t offset=0, enum cudaMemcpyKind kind=cudaMemcpyHostToDevice)
cudaError_t cudaMemcpy(T *dest, const T *src, const size_t count, cudaMemcpyKind kind)
Wrapper around CUDA API function cudaMemcpy.
Definition: apiwrappers.hpp:62
cudaError_t cudaMemcpy2D(T *dest, const size_t dpitch, const T *src, const size_t spitch, const size_t width, const size_t height, cudaMemcpyKind kind)
Wrapper around CUDA API function cudaMemcpy2D.
Definition: apiwrappers.hpp:84
ECUDA_SUPPRESS_HD_WARNINGS __HOST__ __DEVICE__ ecuda::iterator_traits< InputIterator >::difference_type count(InputIterator first, InputIterator last, const T &value)
Definition: count.hpp:92