|
typedef base_type::value_type | value_type |
| cell data type More...
|
|
typedef Alloc | allocator_type |
| allocator type More...
|
|
typedef base_type::size_type | size_type |
| unsigned integral type More...
|
|
typedef base_type::difference_type | difference_type |
| signed integral type More...
|
|
typedef base_type::reference | reference |
| cell reference type More...
|
|
typedef base_type::const_reference | const_reference |
| cell const reference type More...
|
|
typedef base_type::pointer | pointer |
| cell pointer type More...
|
|
typedef make_const< pointer >::type | const_pointer |
| cell const pointer type More...
|
|
typedef model::device_sequence
< value_type,
striding_padded_ptr
< value_type, typename
ecuda::add_pointer< value_type >
::type > > | row_type |
| cube row container type More...
|
|
typedef model::device_sequence
< value_type,
striding_padded_ptr
< value_type, typename
ecuda::add_pointer< value_type >
::type > > | column_type |
| cube column container type More...
|
|
typedef
model::device_contiguous_sequence
< value_type > | depth_type |
| cube depth container type More...
|
|
typedef model::device_sequence
< const value_type,
striding_padded_ptr< const
value_type, typename
ecuda::add_pointer< const
value_type >::type > > | const_row_type |
| cube const row container type More...
|
|
typedef model::device_sequence
< const value_type,
striding_padded_ptr< const
value_type, typename
ecuda::add_pointer< const
value_type >::type > > | const_column_type |
| cube const column container type More...
|
|
typedef
model::device_contiguous_sequence
< const value_type > | const_depth_type |
| cube const depth container type More...
|
|
typedef base_type::iterator | iterator |
| iterator type More...
|
|
typedef base_type::const_iterator | const_iterator |
| const iterator type More...
|
|
typedef base_type::reverse_iterator | reverse_iterator |
| reverse iterator type More...
|
|
typedef
base_type::const_reverse_iterator | const_reverse_iterator |
| const reverse iterator type More...
|
|
typedef model::device_matrix
< value_type,
striding_padded_ptr
< value_type, typename
ecuda::add_pointer< value_type >
::type > > | slice_xy_type |
| xy section of a cube at a fixed depth More...
|
|
typedef
model::device_contiguous_row_matrix
< value_type, typename
ecuda::add_pointer< value_type >
::type > | slice_xz_type |
| xz section of a cube at a fixed column More...
|
|
typedef
model::device_contiguous_row_matrix
< value_type, typename
ecuda::add_pointer< value_type >
::type > | slice_yz_type |
| yz section of a cube at a fixed row More...
|
|
typedef model::device_matrix
< const value_type,
striding_padded_ptr< const
value_type, typename
ecuda::add_pointer< const
value_type >::type > > | const_slice_xy_type |
| xy section of a cube at a fixed depth More...
|
|
typedef
model::device_contiguous_row_matrix
< const value_type, typename
ecuda::add_pointer< const
value_type >::type > | const_slice_xz_type |
| const xz section of a cube at a fixed row More...
|
|
typedef
model::device_contiguous_row_matrix
< const value_type, typename
ecuda::add_pointer< const
value_type >::type > | const_slice_yz_type |
| const yz section of a cube at a fixed row More...
|
|
typedef
impl::cube_kernel_argument< T,
Alloc > | kernel_argument |
| kernel argument type More...
|
|
typedef const
impl::cube_kernel_argument< T,
Alloc > | const_kernel_argument |
| const kernel argument type More...
|
|
|
__HOST__ | cube (const size_type numberRows=0, const size_type numberColumns=0, const size_type numberDepths=0, const value_type &value=value_type(), const Alloc &allocator=Alloc()) |
| Constructs a cube with dimensions numberRows x numberColumns x numberDepths filled with copies of elements with value value. More...
|
|
__HOST__ | cube (const cube &src) |
| Copy constructor. More...
|
|
__HOST__ | cube (const cube &src, const allocator_type &alloc) |
| Copy constructor. More...
|
|
__HOST__ cube & | operator= (const cube &src) |
|
__HOST__ allocator_type | get_allocator () const |
| Returns the allocator associated with the container. More...
|
|
__HOST__ __DEVICE__ size_type | number_rows () const __NOEXCEPT__ |
| Returns the number of rows in the container. More...
|
|
__HOST__ __DEVICE__ size_type | number_columns () const __NOEXCEPT__ |
| Returns the number of columns in the container. More...
|
|
__HOST__ __DEVICE__ size_type | number_depths () const __NOEXCEPT__ |
| Returns the number of depths in the container. More...
|
|
__HOST__ __DEVICE__ size_type | size () const __NOEXCEPT__ |
| Returns the number of elements in the container. More...
|
|
__HOST__ __DEVICE__ bool | empty () const __NOEXCEPT__ |
| Checks if the container has no elements. More...
|
|
__HOST__ __DEVICE__ pointer | data () __NOEXCEPT__ |
| Returns pointer to the underlying 2D memory serving as element storage. More...
|
|
__HOST__ __DEVICE__ const_pointer | data () const __NOEXCEPT__ |
| Returns pointer to the underlying 2D memory serving as element storage. More...
|
|
__HOST__ __DEVICE__ iterator | begin () __NOEXCEPT__ |
| Returns an iterator to the first element of the container. More...
|
|
__HOST__ __DEVICE__ iterator | end () __NOEXCEPT__ |
| Returns an iterator to the element following the last element of the container. More...
|
|
__HOST__ __DEVICE__ const_iterator | begin () const __NOEXCEPT__ |
| Returns an iterator to the first element of the container. More...
|
|
__HOST__ __DEVICE__ const_iterator | end () const __NOEXCEPT__ |
| Returns an iterator to the element following the last element of the container. More...
|
|
__HOST__ __DEVICE__
reverse_iterator | rbegin () __NOEXCEPT__ |
| Returns a reverse iterator to the first element of the reversed container. More...
|
|
__HOST__ __DEVICE__
reverse_iterator | rend () __NOEXCEPT__ |
| Returns a reverse iterator to the element following the last element of the reversed container. More...
|
|
__HOST__ __DEVICE__
const_reverse_iterator | rbegin () const __NOEXCEPT__ |
| Returns a reverse iterator to the first element of the reversed container. More...
|
|
__HOST__ __DEVICE__
const_reverse_iterator | rend () const __NOEXCEPT__ |
| Returns a reverse iterator to the element following the last element of the reversed container. More...
|
|
__HOST__ __DEVICE__ row_type | get_row (const size_type columnIndex, const size_type depthIndex) |
| Gets a view of the sequence of elements forming a single row. More...
|
|
__HOST__ __DEVICE__ column_type | get_column (const size_type rowIndex, const size_type depthIndex) |
| Gets a view of the sequence of elements forming a single column. More...
|
|
__HOST__ __DEVICE__ depth_type | get_depth (const size_type rowIndex, const size_type columnIndex) |
| Gets a view of the sequence of elements forming a single depth. More...
|
|
__HOST__ __DEVICE__ const_row_type | get_row (const size_type columnIndex, const size_type depthIndex) const |
| Gets a view of the sequence of elements forming a single row. More...
|
|
__HOST__ __DEVICE__
const_column_type | get_column (const size_type rowIndex, const size_type depthIndex) const |
| Gets a view of the sequence of elements forming a single column. More...
|
|
__HOST__ __DEVICE__
const_depth_type | get_depth (const size_type rowIndex, const size_type columnIndex) const |
| Gets a view of the sequence of elements forming a single depth. More...
|
|
__HOST__ __DEVICE__ slice_yz_type | get_yz (const size_type rowIndex) |
| Gets a view of the matrix of elements at a single row. More...
|
|
__HOST__ __DEVICE__ slice_xy_type | get_xy (const size_type depthIndex) |
| Gets a view of the matrix of elements at a single depth. More...
|
|
__HOST__ __DEVICE__ slice_xz_type | get_xz (const size_type columnIndex) |
| Gets a view of the matrix of elements at a single column. More...
|
|
__HOST__ __DEVICE__
const_slice_yz_type | get_yz (const size_type rowIndex) const |
| Gets a view of the matrix of elements at a single row. More...
|
|
__HOST__ __DEVICE__
const_slice_xy_type | get_xy (const size_type depthIndex) const |
| Gets a view of the matrix of elements at a single depth. More...
|
|
__HOST__ __DEVICE__
const_slice_xz_type | get_xz (const size_type columnIndex) const |
| Gets a view of the matrix of elements at a single column. More...
|
|
__DEVICE__ reference | at (size_type rowIndex, size_type columnIndex, size_type depthIndex) |
| Returns a reference to the element at specified row, column, and depth index, with bounds checking. More...
|
|
__DEVICE__ const_reference | at (size_type rowIndex, size_type columnIndex, size_type depthIndex) const |
| Returns a constant reference to the element at specified row, column, and depth index, with bounds checking. More...
|
|
__DEVICE__ reference | operator() (const size_type rowIndex, const size_type columnIndex, const size_type depthIndex) |
| Returns a reference to the element at specified location index. No bounds checking is performed. More...
|
|
__DEVICE__ const_reference | operator() (const size_type rowIndex, const size_type columnIndex, const size_type depthIndex) const |
| Returns a reference to the element at specified location index. No bounds checking is performed. More...
|
|
__HOST__ __DEVICE__ slice_yz_type | operator[] (const size_type rowIndex) |
| operator[](rowIndex) alias for get_yz(rowIndex) More...
|
|
__HOST__ __DEVICE__
const_slice_yz_type | operator[] (const size_type rowIndex) const |
| operator[](rowIndex) alias for get_yz(rowIndex) More...
|
|
__HOST__ void | resize (const size_type newNumberRows, const size_type newNumberColumns, const size_type newNumberDepths, const value_type &value=value_type()) |
| Resizes the container to have dimensions newNumberRows x newNumberColumns x newNumberDepths. More...
|
|
__HOST__ __DEVICE__ void | fill (const value_type &value) |
| Assigns a given value to all elements in the container. More...
|
|
template<typename T, class Alloc = device_pitch_allocator<T>, class P = shared_ptr<T>>
class ecuda::cube< T, Alloc, P >
A resizable cube stored in device memory.
A cube is defined as a 3D structure of dimensions rows*columns*depths. The default implementation uses pitched memory where a 2D block of video memory is allocated with width=depths and height=rows*columns. Pitched memory is aligned in a device-dependent manner so that calls to individual elements can be threaded more efficiently (i.e. minimizing the number of read operations required to supply data to multiple threads). Consult the CUDA API documentation for a more verbose explanation.
Methods are prefaced with appropriate keywords to declare them as host and/or device capable. In general: operations requiring memory allocation/deallocation are host only, operations to access the values of specific elements are device only, and copy operations on ranges of data and accessors of general information can be performed on both the host and device.
Memory use can be conceptualized as:
|- depths -|
|---- pitch ----|
_ _ +----------+----+
| | | |xxxx|
| columns | |xxxx| x = allocated but not used, just padding to
| |_ | |xxxx| enforce an efficient memory alignment
rows | |xxxx|
| | |xxxx|
| | |xxxx|
|_ +----------+----+
As a result, it is highly desirable for threading to utilize a depth-wise orientation. For example, a good kernel to perform an operation on the elements of a cube might be:
{
const int dep = blockDim.x*gridDim.x;
const int row = blockIdx.y;
const int col = blockIdx.z;
T& value =
cube(row,col,dep);
}
}
This could be called from host code like:
dim3 grid( 1, 10, 20 ), block( 1000, 1, 1 );
doCubeOperation<<<grid,block>>>(
cube );
Unfortunately, CUDA solutions are very problem specific, so there is no generally applicable example for specifying how thread blocks should be defined. The size of the cube, hardware limitations, CUDA API limitations, etc. all play a part. For example, the above implementation won't work in earlier versions of CUDA when blockDim.x was limited to 512 (at the time of this writing it was 1024 in the newer versions of CUDA).
Just keep in mind that the depth dimension lies in contiguous memory, the column dimension is contiguous blocks of depth blocks, and the row dimension is contiguous blocks of column blocks; thus, an implementation that aims to have concurrently running threads accessing depth >>> column > row will run much more efficiently.
Definition at line 129 of file cube.hpp.