45 using data_store = std::variant<non_owning_buffer<device_type::cpu, T>,
50 buffer() : device_{}, data_{}, size_{}, cached_ptr{nullptr} {}
57 : device_{[mem_type, &
device]() {
65 data_{[
this, mem_type, size, stream]() {
66 auto result = data_store{};
68 case device_type::cpu: result = owning_buffer<device_type::cpu, T>{size};
break;
70 result = owning_buffer<device_type::gpu, T>{std::get<1>(device_), size, stream};
77 auto result =
static_cast<T*
>(
nullptr);
78 switch (data_.index()) {
79 case 0: result = std::get<0>(data_).get();
break;
80 case 1: result = std::get<1>(data_).get();
break;
81 case 2: result = std::get<2>(data_).get();
break;
82 case 3: result = std::get<3>(data_).get();
break;
91 : device_{[mem_type, &device]() {
99 data_{[input_data, mem_type]() {
100 auto result = data_store{};
102 case device_type::cpu: result = non_owning_buffer<device_type::cpu, T>{input_data};
break;
103 case device_type::gpu: result = non_owning_buffer<device_type::gpu, T>{input_data};
break;
108 cached_ptr{[
this]() {
109 auto result =
static_cast<T*
>(
nullptr);
110 switch (data_.index()) {
111 case 0: result = std::get<0>(data_).get();
break;
112 case 1: result = std::get<1>(data_).get();
break;
113 case 2: result = std::get<2>(data_).get();
break;
114 case 3: result = std::get<3>(data_).get();
break;
131 : device_{[mem_type, &device]() {
134 case device_type::cpu: result = device_id<device_type::cpu>{device};
break;
135 case device_type::gpu: result = device_id<device_type::gpu>{device};
break;
139 data_{[
this, &other, mem_type, stream]() {
140 auto result = data_store{};
141 auto result_data =
static_cast<T*
>(
nullptr);
142 if (mem_type == device_type::cpu) {
143 auto buf = owning_buffer<device_type::cpu, T>(other.
size());
144 result_data = buf.get();
145 result = std::move(buf);
146 }
else if (mem_type == device_type::gpu) {
147 auto buf = owning_buffer<device_type::gpu, T>(std::get<1>(device_), other.
size(), stream);
148 result_data = buf.get();
149 result = std::move(buf);
155 cached_ptr{[
this]() {
156 auto result =
static_cast<T*
>(
nullptr);
157 switch (data_.index()) {
158 case 0: result = std::get<0>(data_).get();
break;
159 case 1: result = std::get<1>(data_).get();
break;
160 case 2: result = std::get<2>(data_).get();
break;
161 case 3: result = std::get<3>(data_).get();
break;
184 swap(first.device_, second.device_);
185 swap(first.data_, second.data_);
186 swap(first.size_, second.size_);
187 swap(first.cached_ptr, second.cached_ptr);
201 : device_{[mem_type, &device]() {
209 data_{[&other, mem_type, device, stream]() {
210 auto result = data_store{};
212 result = std::move(other.data_);
214 auto* result_data =
static_cast<T*
>(
nullptr);
215 if (mem_type == device_type::cpu) {
216 auto buf = owning_buffer<device_type::cpu, T>{other.
size()};
217 result_data = buf.get();
218 result = std::move(buf);
219 }
else if (mem_type == device_type::gpu) {
220 auto buf = owning_buffer<device_type::gpu, T>{device, other.
size(), stream};
221 result_data = buf.get();
222 result = std::move(buf);
229 cached_ptr{[
this]() {
230 auto result =
static_cast<T*
>(
nullptr);
231 switch (data_.index()) {
232 case 0: result = std::get<0>(data_).get();
break;
233 case 1: result = std::get<1>(data_).get();
break;
234 case 2: result = std::get<2>(data_).get();
break;
235 case 3: result = std::get<3>(data_).get();
break;
256 data_ = std::move(other.data_);
257 device_ = std::move(other.device_);
258 size_ = std::move(other.size_);
259 cached_ptr = std::move(other.cached_ptr);
265 typename = decltype(*std::declval<iter_t&>(),
void(), ++std::declval<iter_t&>(),
void())>
266 buffer(iter_t
const& begin, iter_t
const& end)
267 :
buffer{static_cast<size_t>(std::distance(begin, end))}
269 auto index = std::size_t{};
270 std::for_each(begin, end, [&index,
this](
auto&& val) { data()[index++] = val; });
275 typename = decltype(*std::declval<iter_t&>(),
void(), ++std::declval<iter_t&>(),
void())>
283 typename = decltype(*std::declval<iter_t&>(),
void(), ++std::declval<iter_t&>(),
void())>
293 auto size() const noexcept {
return size_; }
298 if (device_.index() == 0) {
299 result = device_type::cpu;
301 result = device_type::gpu;
306 auto device() const noexcept {
return device_; }
311 switch (device_.index()) {
312 case 0: result = std::get<0>(device_).value();
break;
313 case 1: result = std::get<1>(device_).value();
break;
326 template <
bool bounds_check,
typename T,
typename U>
334 if constexpr (bounds_check) {
335 if (src.
size() - src_offset < size || dst.
size() - dst_offset < size) {
336 throw out_of_bounds(
"Attempted copy to or from buffer of inadequate size");
340 src.
data() + src_offset,
347 template <
bool bounds_check,
typename T,
typename U>
350 copy<bounds_check>(dst, src, 0, 0, src.
size(), stream);
352 template <
bool bounds_check,
typename T,
typename U>
358 template <
bool bounds_check,
typename T,
typename U>
366 if constexpr (bounds_check) {
367 if (src.size() - src_offset < size || dst.
size() - dst_offset < size) {
368 throw out_of_bounds(
"Attempted copy to or from buffer of inadequate size");
371 copy(dst.data() + dst_offset,
372 src.
data() + src_offset,
379 template <
bool bounds_check,
typename T,
typename U>
385 copy<bounds_check>(dst, src, dst_offset, 0, src.
size(), stream);
388 template <
bool bounds_check,
typename T,
typename U>
391 copy<bounds_check>(dst, src, 0, 0, src.size(), stream);
393 template <
bool bounds_check,
typename T,
typename U>
396 copy<bounds_check>(dst, src, 0, 0, src.size(),
cuda_stream{});
#define DEVICE
Definition: gpu_support.hpp:35
#define HOST
Definition: gpu_support.hpp:34
Definition: buffer.hpp:35
const_agnostic_same_t< T, U > copy(buffer< T > &&dst, buffer< U > &&src)
Definition: buffer.hpp:394
int cuda_stream
Definition: cuda_stream.hpp:25
std::enable_if_t< std::is_same_v< std::remove_const_t< T >, std::remove_const_t< U > >, V > const_agnostic_same_t
Definition: const_agnostic.hpp:22
device_type
Definition: device_type.hpp:18
std::variant< device_id< device_type::cpu >, device_id< device_type::gpu > > device_id_variant
Definition: device_id.hpp:31
A container which may or may not own its own data on host or device.
Definition: buffer.hpp:41
T value_type
Definition: buffer.hpp:43
buffer(iter_t const &begin, iter_t const &end, device_type mem_type, int device, cuda_stream stream=cuda_stream{})
Definition: buffer.hpp:284
buffer(buffer< T > const &other, device_type mem_type, int device=0, cuda_stream stream=cuda_stream{})
Construct one buffer from another in the given memory location (either on host or on device) A buffer...
Definition: buffer.hpp:127
buffer(iter_t const &begin, iter_t const &end)
Definition: buffer.hpp:266
std::variant< non_owning_buffer< device_type::cpu, T >, non_owning_buffer< device_type::gpu, T >, owning_buffer< device_type::cpu, T >, owning_buffer< device_type::gpu, T > > data_store
Definition: buffer.hpp:48
buffer(buffer< T > const &other, cuda_stream stream=cuda_stream{})
Create owning copy of existing buffer with given stream The memory type of this new buffer will be th...
Definition: buffer.hpp:172
buffer()
Definition: buffer.hpp:50
auto size() const noexcept
Definition: buffer.hpp:293
buffer(buffer< T > &&other, device_type mem_type, int device)
Definition: buffer.hpp:241
friend void swap(buffer< T > &first, buffer< T > &second)
Create owning copy of existing buffer The memory type of this new buffer will be the same as the orig...
Definition: buffer.hpp:181
buffer(index_type size, device_type mem_type=device_type::cpu, int device=0, cuda_stream stream=0)
Definition: buffer.hpp:53
buffer< T > & operator=(buffer< T > &&other) noexcept
Definition: buffer.hpp:254
HOST DEVICE auto * data() const noexcept
Definition: buffer.hpp:294
buffer(iter_t const &begin, iter_t const &end, device_type mem_type)
Definition: buffer.hpp:276
std::size_t index_type
Definition: buffer.hpp:42
auto memory_type() const noexcept
Definition: buffer.hpp:295
buffer(T *input_data, index_type size, device_type mem_type=device_type::cpu, int device=0)
Definition: buffer.hpp:90
buffer< T > & operator=(buffer< T > const &other)
Definition: buffer.hpp:189
buffer(buffer< T > &&other) noexcept
Definition: buffer.hpp:250
buffer(buffer< T > &&other, device_type mem_type)
Definition: buffer.hpp:245
auto device_index() const noexcept
Definition: buffer.hpp:308
auto device() const noexcept
Definition: buffer.hpp:306
buffer(buffer< T > &&other, device_type mem_type, int device, cuda_stream stream)
Move from existing buffer unless a copy is necessary based on memory location.
Definition: buffer.hpp:200
Definition: exceptions.hpp:29