45   using data_store = std::variant<non_owning_buffer<device_type::cpu, T>,
 
   50   buffer() : device_{}, data_{}, size_{}, cached_ptr{nullptr} {}
 
   57     : device_{[mem_type, &
device]() {
 
   65       data_{[
this, mem_type, size, stream]() {
 
   66         auto result = data_store{};
 
   68           case device_type::cpu: result = owning_buffer<device_type::cpu, T>{size}; 
break;
 
   70             result = owning_buffer<device_type::gpu, T>{std::get<1>(device_), size, stream};
 
   77         auto result = 
static_cast<T*
>(
nullptr);
 
   78         switch (data_.index()) {
 
   79           case 0: result = std::get<0>(data_).get(); 
break;
 
   80           case 1: result = std::get<1>(data_).get(); 
break;
 
   81           case 2: result = std::get<2>(data_).get(); 
break;
 
   82           case 3: result = std::get<3>(data_).get(); 
break;
 
   91     : device_{[mem_type, &device]() {
 
   99       data_{[input_data, mem_type]() {
 
  100         auto result = data_store{};
 
  102           case device_type::cpu: result = non_owning_buffer<device_type::cpu, T>{input_data}; 
break;
 
  103           case device_type::gpu: result = non_owning_buffer<device_type::gpu, T>{input_data}; 
break;
 
  108       cached_ptr{[
this]() {
 
  109         auto result = 
static_cast<T*
>(
nullptr);
 
  110         switch (data_.index()) {
 
  111           case 0: result = std::get<0>(data_).get(); 
break;
 
  112           case 1: result = std::get<1>(data_).get(); 
break;
 
  113           case 2: result = std::get<2>(data_).get(); 
break;
 
  114           case 3: result = std::get<3>(data_).get(); 
break;
 
  131     : device_{[mem_type, &device]() {
 
  134           case device_type::cpu: result = device_id<device_type::cpu>{device}; 
break;
 
  135           case device_type::gpu: result = device_id<device_type::gpu>{device}; 
break;
 
  139       data_{[
this, &other, mem_type, stream]() {
 
  140         auto result      = data_store{};
 
  141         auto result_data = 
static_cast<T*
>(
nullptr);
 
  142         if (mem_type == device_type::cpu) {
 
  143           auto buf    = owning_buffer<device_type::cpu, T>(other.
size());
 
  144           result_data = buf.get();
 
  145           result      = std::move(buf);
 
  146         } 
else if (mem_type == device_type::gpu) {
 
  147           auto buf = owning_buffer<device_type::gpu, T>(std::get<1>(device_), other.
size(), stream);
 
  148           result_data = buf.get();
 
  149           result      = std::move(buf);
 
  155       cached_ptr{[
this]() {
 
  156         auto result = 
static_cast<T*
>(
nullptr);
 
  157         switch (data_.index()) {
 
  158           case 0: result = std::get<0>(data_).get(); 
break;
 
  159           case 1: result = std::get<1>(data_).get(); 
break;
 
  160           case 2: result = std::get<2>(data_).get(); 
break;
 
  161           case 3: result = std::get<3>(data_).get(); 
break;
 
  184     swap(first.device_, second.device_);
 
  185     swap(first.data_, second.data_);
 
  186     swap(first.size_, second.size_);
 
  187     swap(first.cached_ptr, second.cached_ptr);
 
  201     : device_{[mem_type, &device]() {
 
  209       data_{[&other, mem_type, device, stream]() {
 
  210         auto result = data_store{};
 
  212           result = std::move(other.data_);
 
  214           auto* result_data = 
static_cast<T*
>(
nullptr);
 
  215           if (mem_type == device_type::cpu) {
 
  216             auto buf    = owning_buffer<device_type::cpu, T>{other.
size()};
 
  217             result_data = buf.get();
 
  218             result      = std::move(buf);
 
  219           } 
else if (mem_type == device_type::gpu) {
 
  220             auto buf    = owning_buffer<device_type::gpu, T>{device, other.
size(), stream};
 
  221             result_data = buf.get();
 
  222             result      = std::move(buf);
 
  229       cached_ptr{[
this]() {
 
  230         auto result = 
static_cast<T*
>(
nullptr);
 
  231         switch (data_.index()) {
 
  232           case 0: result = std::get<0>(data_).get(); 
break;
 
  233           case 1: result = std::get<1>(data_).get(); 
break;
 
  234           case 2: result = std::get<2>(data_).get(); 
break;
 
  235           case 3: result = std::get<3>(data_).get(); 
break;
 
  256     data_      = std::move(other.data_);
 
  257     device_    = std::move(other.device_);
 
  258     size_      = std::move(other.size_);
 
  259     cached_ptr = std::move(other.cached_ptr);
 
  265     typename = decltype(*std::declval<iter_t&>(), 
void(), ++std::declval<iter_t&>(), 
void())>
 
  266   buffer(iter_t 
const& begin, iter_t 
const& end)
 
  267     : 
buffer{static_cast<size_t>(std::distance(begin, end))}
 
  269     auto index = std::size_t{};
 
  270     std::for_each(begin, end, [&index, 
this](
auto&& val) { data()[index++] = val; });
 
  275     typename = decltype(*std::declval<iter_t&>(), 
void(), ++std::declval<iter_t&>(), 
void())>
 
  283     typename = decltype(*std::declval<iter_t&>(), 
void(), ++std::declval<iter_t&>(), 
void())>
 
  293   auto size() const noexcept { 
return size_; }
 
  298     if (device_.index() == 0) {
 
  299       result = device_type::cpu;
 
  301       result = device_type::gpu;
 
  306   auto device() const noexcept { 
return device_; }
 
  311     switch (device_.index()) {
 
  312       case 0: result = std::get<0>(device_).value(); 
break;
 
  313       case 1: result = std::get<1>(device_).value(); 
break;
 
  326 template <
bool bounds_check, 
typename T, 
typename U>
 
  334   if constexpr (bounds_check) {
 
  335     if (src.
size() - src_offset < size || dst.
size() - dst_offset < size) {
 
  336       throw out_of_bounds(
"Attempted copy to or from buffer of inadequate size");
 
  340        src.
data() + src_offset,
 
  347 template <
bool bounds_check, 
typename T, 
typename U>
 
  350   copy<bounds_check>(dst, src, 0, 0, src.
size(), stream);
 
  352 template <
bool bounds_check, 
typename T, 
typename U>
 
  358 template <
bool bounds_check, 
typename T, 
typename U>
 
  366   if constexpr (bounds_check) {
 
  367     if (src.size() - src_offset < size || dst.
size() - dst_offset < size) {
 
  368       throw out_of_bounds(
"Attempted copy to or from buffer of inadequate size");
 
  371   copy(dst.data() + dst_offset,
 
  372        src.
data() + src_offset,
 
  379 template <
bool bounds_check, 
typename T, 
typename U>
 
  385   copy<bounds_check>(dst, src, dst_offset, 0, src.
size(), stream);
 
  388 template <
bool bounds_check, 
typename T, 
typename U>
 
  391   copy<bounds_check>(dst, src, 0, 0, src.size(), stream);
 
  393 template <
bool bounds_check, 
typename T, 
typename U>
 
  396   copy<bounds_check>(dst, src, 0, 0, src.size(), 
cuda_stream{});
 
#define DEVICE
Definition: gpu_support.hpp:35
 
#define HOST
Definition: gpu_support.hpp:34
 
Definition: buffer.hpp:35
 
const_agnostic_same_t< T, U > copy(buffer< T > &&dst, buffer< U > &&src)
Definition: buffer.hpp:394
 
int cuda_stream
Definition: cuda_stream.hpp:25
 
std::enable_if_t< std::is_same_v< std::remove_const_t< T >, std::remove_const_t< U > >, V > const_agnostic_same_t
Definition: const_agnostic.hpp:22
 
device_type
Definition: device_type.hpp:18
 
std::variant< device_id< device_type::cpu >, device_id< device_type::gpu > > device_id_variant
Definition: device_id.hpp:31
 
A container which may or may not own its own data on host or device.
Definition: buffer.hpp:41
 
T value_type
Definition: buffer.hpp:43
 
buffer(iter_t const &begin, iter_t const &end, device_type mem_type, int device, cuda_stream stream=cuda_stream{})
Definition: buffer.hpp:284
 
buffer(buffer< T > const &other, device_type mem_type, int device=0, cuda_stream stream=cuda_stream{})
Construct one buffer from another in the given memory location (either on host or on device) A buffer...
Definition: buffer.hpp:127
 
buffer(iter_t const &begin, iter_t const &end)
Definition: buffer.hpp:266
 
std::variant< non_owning_buffer< device_type::cpu, T >, non_owning_buffer< device_type::gpu, T >, owning_buffer< device_type::cpu, T >, owning_buffer< device_type::gpu, T > > data_store
Definition: buffer.hpp:48
 
buffer(buffer< T > const &other, cuda_stream stream=cuda_stream{})
Create owning copy of existing buffer with given stream The memory type of this new buffer will be th...
Definition: buffer.hpp:172
 
buffer()
Definition: buffer.hpp:50
 
auto size() const noexcept
Definition: buffer.hpp:293
 
buffer(buffer< T > &&other, device_type mem_type, int device)
Definition: buffer.hpp:241
 
friend void swap(buffer< T > &first, buffer< T > &second)
Create owning copy of existing buffer The memory type of this new buffer will be the same as the orig...
Definition: buffer.hpp:181
 
buffer(index_type size, device_type mem_type=device_type::cpu, int device=0, cuda_stream stream=0)
Definition: buffer.hpp:53
 
buffer< T > & operator=(buffer< T > &&other) noexcept
Definition: buffer.hpp:254
 
HOST DEVICE auto * data() const noexcept
Definition: buffer.hpp:294
 
buffer(iter_t const &begin, iter_t const &end, device_type mem_type)
Definition: buffer.hpp:276
 
std::size_t index_type
Definition: buffer.hpp:42
 
auto memory_type() const noexcept
Definition: buffer.hpp:295
 
buffer(T *input_data, index_type size, device_type mem_type=device_type::cpu, int device=0)
Definition: buffer.hpp:90
 
buffer< T > & operator=(buffer< T > const &other)
Definition: buffer.hpp:189
 
buffer(buffer< T > &&other) noexcept
Definition: buffer.hpp:250
 
buffer(buffer< T > &&other, device_type mem_type)
Definition: buffer.hpp:245
 
auto device_index() const noexcept
Definition: buffer.hpp:308
 
auto device() const noexcept
Definition: buffer.hpp:306
 
buffer(buffer< T > &&other, device_type mem_type, int device, cuda_stream stream)
Move from existing buffer unless a copy is necessary based on memory location.
Definition: buffer.hpp:200
 
Definition: exceptions.hpp:29