34 using data_store = std::variant<non_owning_buffer<device_type::cpu, T>,
39 buffer() : device_{}, data_{}, size_{}, cached_ptr{nullptr} {}
46 : device_{[mem_type, &
device]() {
54 data_{[
this, mem_type, size, stream]() {
55 auto result = data_store{};
57 case device_type::cpu: result = owning_buffer<device_type::cpu, T>{size};
break;
59 result = owning_buffer<device_type::gpu, T>{std::get<1>(device_), size, stream};
66 auto result =
static_cast<T*
>(
nullptr);
67 switch (data_.index()) {
68 case 0: result = std::get<0>(data_).get();
break;
69 case 1: result = std::get<1>(data_).get();
break;
70 case 2: result = std::get<2>(data_).get();
break;
71 case 3: result = std::get<3>(data_).get();
break;
80 : device_{[mem_type, &device]() {
88 data_{[input_data, mem_type]() {
89 auto result = data_store{};
91 case device_type::cpu: result = non_owning_buffer<device_type::cpu, T>{input_data};
break;
92 case device_type::gpu: result = non_owning_buffer<device_type::gpu, T>{input_data};
break;
98 auto result =
static_cast<T*
>(
nullptr);
99 switch (data_.index()) {
100 case 0: result = std::get<0>(data_).get();
break;
101 case 1: result = std::get<1>(data_).get();
break;
102 case 2: result = std::get<2>(data_).get();
break;
103 case 3: result = std::get<3>(data_).get();
break;
120 : device_{[mem_type, &device]() {
123 case device_type::cpu: result = device_id<device_type::cpu>{device};
break;
124 case device_type::gpu: result = device_id<device_type::gpu>{device};
break;
128 data_{[
this, &other, mem_type, stream]() {
129 auto result = data_store{};
130 auto result_data =
static_cast<T*
>(
nullptr);
131 if (mem_type == device_type::cpu) {
132 auto buf = owning_buffer<device_type::cpu, T>(other.
size());
133 result_data = buf.get();
134 result = std::move(buf);
135 }
else if (mem_type == device_type::gpu) {
136 auto buf = owning_buffer<device_type::gpu, T>(std::get<1>(device_), other.
size(), stream);
137 result_data = buf.get();
138 result = std::move(buf);
144 cached_ptr{[
this]() {
145 auto result =
static_cast<T*
>(
nullptr);
146 switch (data_.index()) {
147 case 0: result = std::get<0>(data_).get();
break;
148 case 1: result = std::get<1>(data_).get();
break;
149 case 2: result = std::get<2>(data_).get();
break;
150 case 3: result = std::get<3>(data_).get();
break;
173 swap(first.device_, second.device_);
174 swap(first.data_, second.data_);
175 swap(first.size_, second.size_);
176 swap(first.cached_ptr, second.cached_ptr);
190 : device_{[mem_type, &device]() {
198 data_{[&other, mem_type, device, stream]() {
199 auto result = data_store{};
201 result = std::move(other.data_);
203 auto* result_data =
static_cast<T*
>(
nullptr);
204 if (mem_type == device_type::cpu) {
205 auto buf = owning_buffer<device_type::cpu, T>{other.
size()};
206 result_data = buf.get();
207 result = std::move(buf);
208 }
else if (mem_type == device_type::gpu) {
209 auto buf = owning_buffer<device_type::gpu, T>{device, other.
size(), stream};
210 result_data = buf.get();
211 result = std::move(buf);
218 cached_ptr{[
this]() {
219 auto result =
static_cast<T*
>(
nullptr);
220 switch (data_.index()) {
221 case 0: result = std::get<0>(data_).get();
break;
222 case 1: result = std::get<1>(data_).get();
break;
223 case 2: result = std::get<2>(data_).get();
break;
224 case 3: result = std::get<3>(data_).get();
break;
245 data_ = std::move(other.data_);
246 device_ = std::move(other.device_);
247 size_ = std::move(other.size_);
248 cached_ptr = std::move(other.cached_ptr);
254 typename = decltype(*std::declval<iter_t&>(),
void(), ++std::declval<iter_t&>(),
void())>
255 buffer(iter_t
const& begin, iter_t
const& end)
256 :
buffer{static_cast<size_t>(std::distance(begin, end))}
258 auto index = std::size_t{};
259 std::for_each(begin, end, [&index,
this](
auto&& val) { data()[index++] = val; });
264 typename = decltype(*std::declval<iter_t&>(),
void(), ++std::declval<iter_t&>(),
void())>
272 typename = decltype(*std::declval<iter_t&>(),
void(), ++std::declval<iter_t&>(),
void())>
282 auto size() const noexcept {
return size_; }
287 if (device_.index() == 0) {
288 result = device_type::cpu;
290 result = device_type::gpu;
295 auto device() const noexcept {
return device_; }
300 switch (device_.index()) {
301 case 0: result = std::get<0>(device_).value();
break;
302 case 1: result = std::get<1>(device_).value();
break;
315 template <
bool bounds_check,
typename T,
typename U>
323 if constexpr (bounds_check) {
324 if (src.
size() - src_offset < size || dst.
size() - dst_offset < size) {
325 throw out_of_bounds(
"Attempted copy to or from buffer of inadequate size");
329 src.
data() + src_offset,
336 template <
bool bounds_check,
typename T,
typename U>
339 copy<bounds_check>(dst, src, 0, 0, src.
size(), stream);
341 template <
bool bounds_check,
typename T,
typename U>
347 template <
bool bounds_check,
typename T,
typename U>
355 if constexpr (bounds_check) {
356 if (src.size() - src_offset < size || dst.
size() - dst_offset < size) {
357 throw out_of_bounds(
"Attempted copy to or from buffer of inadequate size");
360 copy(dst.data() + dst_offset,
361 src.
data() + src_offset,
368 template <
bool bounds_check,
typename T,
typename U>
374 copy<bounds_check>(dst, src, dst_offset, 0, src.
size(), stream);
377 template <
bool bounds_check,
typename T,
typename U>
380 copy<bounds_check>(dst, src, 0, 0, src.size(), stream);
382 template <
bool bounds_check,
typename T,
typename U>
385 copy<bounds_check>(dst, src, 0, 0, src.size(),
cuda_stream{});
#define DEVICE
Definition: gpu_support.hpp:24
#define HOST
Definition: gpu_support.hpp:23
Definition: buffer.hpp:24
const_agnostic_same_t< T, U > copy(buffer< T > &&dst, buffer< U > &&src)
Definition: buffer.hpp:383
int cuda_stream
Definition: cuda_stream.hpp:14
std::enable_if_t< std::is_same_v< std::remove_const_t< T >, std::remove_const_t< U > >, V > const_agnostic_same_t
Definition: const_agnostic.hpp:11
device_type
Definition: device_type.hpp:7
std::variant< device_id< device_type::cpu >, device_id< device_type::gpu > > device_id_variant
Definition: device_id.hpp:20
A container which may or may not own its own data on host or device.
Definition: buffer.hpp:30
T value_type
Definition: buffer.hpp:32
buffer(iter_t const &begin, iter_t const &end, device_type mem_type, int device, cuda_stream stream=cuda_stream{})
Definition: buffer.hpp:273
buffer(buffer< T > const &other, device_type mem_type, int device=0, cuda_stream stream=cuda_stream{})
Construct one buffer from another in the given memory location (either on host or on device) A buffer...
Definition: buffer.hpp:116
buffer(iter_t const &begin, iter_t const &end)
Definition: buffer.hpp:255
std::variant< non_owning_buffer< device_type::cpu, T >, non_owning_buffer< device_type::gpu, T >, owning_buffer< device_type::cpu, T >, owning_buffer< device_type::gpu, T > > data_store
Definition: buffer.hpp:37
buffer(buffer< T > const &other, cuda_stream stream=cuda_stream{})
Create owning copy of existing buffer with given stream The memory type of this new buffer will be th...
Definition: buffer.hpp:161
buffer()
Definition: buffer.hpp:39
auto size() const noexcept
Definition: buffer.hpp:282
buffer(buffer< T > &&other, device_type mem_type, int device)
Definition: buffer.hpp:230
friend void swap(buffer< T > &first, buffer< T > &second)
Create owning copy of existing buffer The memory type of this new buffer will be the same as the orig...
Definition: buffer.hpp:170
buffer(index_type size, device_type mem_type=device_type::cpu, int device=0, cuda_stream stream=0)
Definition: buffer.hpp:42
buffer< T > & operator=(buffer< T > &&other) noexcept
Definition: buffer.hpp:243
HOST DEVICE auto * data() const noexcept
Definition: buffer.hpp:283
buffer(iter_t const &begin, iter_t const &end, device_type mem_type)
Definition: buffer.hpp:265
std::size_t index_type
Definition: buffer.hpp:31
auto memory_type() const noexcept
Definition: buffer.hpp:284
buffer(T *input_data, index_type size, device_type mem_type=device_type::cpu, int device=0)
Definition: buffer.hpp:79
buffer< T > & operator=(buffer< T > const &other)
Definition: buffer.hpp:178
buffer(buffer< T > &&other) noexcept
Definition: buffer.hpp:239
buffer(buffer< T > &&other, device_type mem_type)
Definition: buffer.hpp:234
auto device_index() const noexcept
Definition: buffer.hpp:297
auto device() const noexcept
Definition: buffer.hpp:295
buffer(buffer< T > &&other, device_type mem_type, int device, cuda_stream stream)
Move from existing buffer unless a copy is necessary based on memory location.
Definition: buffer.hpp:189
Definition: exceptions.hpp:18