buffer.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 #pragma once
15 
16 #include <stdint.h>
17 
18 #include <cstddef>
19 #include <iterator>
20 #include <memory>
21 #include <utility>
22 #include <variant>
23 
24 namespace raft_proto {
29 template <typename T>
30 struct buffer {
31  using index_type = std::size_t;
32  using value_type = T;
33 
34  using data_store = std::variant<non_owning_buffer<device_type::cpu, T>,
38 
39  buffer() : device_{}, data_{}, size_{}, cached_ptr{nullptr} {}
40 
43  device_type mem_type = device_type::cpu,
44  int device = 0,
45  cuda_stream stream = 0)
46  : device_{[mem_type, &device]() {
47  auto result = device_id_variant{};
48  switch (mem_type) {
51  }
52  return result;
53  }()},
54  data_{[this, mem_type, size, stream]() {
55  auto result = data_store{};
56  switch (mem_type) {
57  case device_type::cpu: result = owning_buffer<device_type::cpu, T>{size}; break;
58  case device_type::gpu:
59  result = owning_buffer<device_type::gpu, T>{std::get<1>(device_), size, stream};
60  break;
61  }
62  return result;
63  }()},
64  size_{size},
65  cached_ptr{[this]() {
66  auto result = static_cast<T*>(nullptr);
67  switch (data_.index()) {
68  case 0: result = std::get<0>(data_).get(); break;
69  case 1: result = std::get<1>(data_).get(); break;
70  case 2: result = std::get<2>(data_).get(); break;
71  case 3: result = std::get<3>(data_).get(); break;
72  }
73  return result;
74  }()}
75  {
76  }
77 
79  buffer(T* input_data, index_type size, device_type mem_type = device_type::cpu, int device = 0)
80  : device_{[mem_type, &device]() {
81  auto result = device_id_variant{};
82  switch (mem_type) {
83  case device_type::cpu: result = device_id<device_type::cpu>{device}; break;
84  case device_type::gpu: result = device_id<device_type::gpu>{device}; break;
85  }
86  return result;
87  }()},
88  data_{[input_data, mem_type]() {
89  auto result = data_store{};
90  switch (mem_type) {
91  case device_type::cpu: result = non_owning_buffer<device_type::cpu, T>{input_data}; break;
92  case device_type::gpu: result = non_owning_buffer<device_type::gpu, T>{input_data}; break;
93  }
94  return result;
95  }()},
96  size_{size},
97  cached_ptr{[this]() {
98  auto result = static_cast<T*>(nullptr);
99  switch (data_.index()) {
100  case 0: result = std::get<0>(data_).get(); break;
101  case 1: result = std::get<1>(data_).get(); break;
102  case 2: result = std::get<2>(data_).get(); break;
103  case 3: result = std::get<3>(data_).get(); break;
104  }
105  return result;
106  }()}
107  {
108  }
109 
116  buffer(buffer<T> const& other,
117  device_type mem_type,
118  int device = 0,
119  cuda_stream stream = cuda_stream{})
120  : device_{[mem_type, &device]() {
121  auto result = device_id_variant{};
122  switch (mem_type) {
123  case device_type::cpu: result = device_id<device_type::cpu>{device}; break;
124  case device_type::gpu: result = device_id<device_type::gpu>{device}; break;
125  }
126  return result;
127  }()},
128  data_{[this, &other, mem_type, stream]() {
129  auto result = data_store{};
130  auto result_data = static_cast<T*>(nullptr);
131  if (mem_type == device_type::cpu) {
132  auto buf = owning_buffer<device_type::cpu, T>(other.size());
133  result_data = buf.get();
134  result = std::move(buf);
135  } else if (mem_type == device_type::gpu) {
136  auto buf = owning_buffer<device_type::gpu, T>(std::get<1>(device_), other.size(), stream);
137  result_data = buf.get();
138  result = std::move(buf);
139  }
140  copy(result_data, other.data(), other.size(), mem_type, other.memory_type(), stream);
141  return result;
142  }()},
143  size_{other.size()},
144  cached_ptr{[this]() {
145  auto result = static_cast<T*>(nullptr);
146  switch (data_.index()) {
147  case 0: result = std::get<0>(data_).get(); break;
148  case 1: result = std::get<1>(data_).get(); break;
149  case 2: result = std::get<2>(data_).get(); break;
150  case 3: result = std::get<3>(data_).get(); break;
151  }
152  return result;
153  }()}
154  {
155  }
156 
161  buffer(buffer<T> const& other, cuda_stream stream = cuda_stream{})
162  : buffer(other, other.memory_type(), other.device_index(), stream)
163  {
164  }
165 
170  friend void swap(buffer<T>& first, buffer<T>& second)
171  {
172  using std::swap;
173  swap(first.device_, second.device_);
174  swap(first.data_, second.data_);
175  swap(first.size_, second.size_);
176  swap(first.cached_ptr, second.cached_ptr);
177  }
179  {
180  auto copy = other;
181  swap(*this, copy);
182  return *this;
183  }
184 
189  buffer(buffer<T>&& other, device_type mem_type, int device, cuda_stream stream)
190  : device_{[mem_type, &device]() {
191  auto result = device_id_variant{};
192  switch (mem_type) {
193  case device_type::cpu: result = device_id<device_type::cpu>{device}; break;
194  case device_type::gpu: result = device_id<device_type::gpu>{device}; break;
195  }
196  return result;
197  }()},
198  data_{[&other, mem_type, device, stream]() {
199  auto result = data_store{};
200  if (mem_type == other.memory_type() && device == other.device_index()) {
201  result = std::move(other.data_);
202  } else {
203  auto* result_data = static_cast<T*>(nullptr);
204  if (mem_type == device_type::cpu) {
205  auto buf = owning_buffer<device_type::cpu, T>{other.size()};
206  result_data = buf.get();
207  result = std::move(buf);
208  } else if (mem_type == device_type::gpu) {
209  auto buf = owning_buffer<device_type::gpu, T>{device, other.size(), stream};
210  result_data = buf.get();
211  result = std::move(buf);
212  }
213  copy(result_data, other.data(), other.size(), mem_type, other.memory_type(), stream);
214  }
215  return result;
216  }()},
217  size_{other.size()},
218  cached_ptr{[this]() {
219  auto result = static_cast<T*>(nullptr);
220  switch (data_.index()) {
221  case 0: result = std::get<0>(data_).get(); break;
222  case 1: result = std::get<1>(data_).get(); break;
223  case 2: result = std::get<2>(data_).get(); break;
224  case 3: result = std::get<3>(data_).get(); break;
225  }
226  return result;
227  }()}
228  {
229  }
230  buffer(buffer<T>&& other, device_type mem_type, int device)
231  : buffer{std::move(other), mem_type, device, cuda_stream{}}
232  {
233  }
234  buffer(buffer<T>&& other, device_type mem_type)
235  : buffer{std::move(other), mem_type, 0, cuda_stream{}}
236  {
237  }
238 
239  buffer(buffer<T>&& other) noexcept
240  : buffer{std::move(other), other.memory_type(), other.device_index(), cuda_stream{}}
241  {
242  }
243  buffer<T>& operator=(buffer<T>&& other) noexcept
244  {
245  data_ = std::move(other.data_);
246  device_ = std::move(other.device_);
247  size_ = std::move(other.size_);
248  cached_ptr = std::move(other.cached_ptr);
249  return *this;
250  }
251 
252  template <
253  typename iter_t,
254  typename = decltype(*std::declval<iter_t&>(), void(), ++std::declval<iter_t&>(), void())>
255  buffer(iter_t const& begin, iter_t const& end)
256  : buffer{static_cast<size_t>(std::distance(begin, end))}
257  {
258  auto index = std::size_t{};
259  std::for_each(begin, end, [&index, this](auto&& val) { data()[index++] = val; });
260  }
261 
262  template <
263  typename iter_t,
264  typename = decltype(*std::declval<iter_t&>(), void(), ++std::declval<iter_t&>(), void())>
265  buffer(iter_t const& begin, iter_t const& end, device_type mem_type)
266  : buffer{buffer{begin, end}, mem_type}
267  {
268  }
269 
270  template <
271  typename iter_t,
272  typename = decltype(*std::declval<iter_t&>(), void(), ++std::declval<iter_t&>(), void())>
273  buffer(iter_t const& begin,
274  iter_t const& end,
275  device_type mem_type,
276  int device,
277  cuda_stream stream = cuda_stream{})
278  : buffer{buffer{begin, end}, mem_type, device, stream}
279  {
280  }
281 
282  auto size() const noexcept { return size_; }
283  HOST DEVICE auto* data() const noexcept { return cached_ptr; }
284  auto memory_type() const noexcept
285  {
286  auto result = device_type{};
287  if (device_.index() == 0) {
288  result = device_type::cpu;
289  } else {
290  result = device_type::gpu;
291  }
292  return result;
293  }
294 
295  auto device() const noexcept { return device_; }
296 
297  auto device_index() const noexcept
298  {
299  auto result = int{};
300  switch (device_.index()) {
301  case 0: result = std::get<0>(device_).value(); break;
302  case 1: result = std::get<1>(device_).value(); break;
303  }
304  return result;
305  }
306  ~buffer() = default;
307 
308  private:
309  device_id_variant device_;
310  data_store data_;
311  index_type size_;
312  T* cached_ptr;
313 };
314 
315 template <bool bounds_check, typename T, typename U>
317  buffer<U> const& src,
318  typename buffer<T>::index_type dst_offset,
319  typename buffer<U>::index_type src_offset,
320  typename buffer<T>::index_type size,
321  cuda_stream stream)
322 {
323  if constexpr (bounds_check) {
324  if (src.size() - src_offset < size || dst.size() - dst_offset < size) {
325  throw out_of_bounds("Attempted copy to or from buffer of inadequate size");
326  }
327  }
328  copy(dst.data() + dst_offset,
329  src.data() + src_offset,
330  size,
331  dst.memory_type(),
332  src.memory_type(),
333  stream);
334 }
335 
336 template <bool bounds_check, typename T, typename U>
338 {
339  copy<bounds_check>(dst, src, 0, 0, src.size(), stream);
340 }
341 template <bool bounds_check, typename T, typename U>
343 {
344  copy<bounds_check>(dst, src, 0, 0, src.size(), cuda_stream{});
345 }
346 
347 template <bool bounds_check, typename T, typename U>
349  buffer<U>&& src,
350  typename buffer<T>::index_type dst_offset,
351  typename buffer<U>::index_type src_offset,
352  typename buffer<T>::index_type size,
353  cuda_stream stream)
354 {
355  if constexpr (bounds_check) {
356  if (src.size() - src_offset < size || dst.size() - dst_offset < size) {
357  throw out_of_bounds("Attempted copy to or from buffer of inadequate size");
358  }
359  }
360  copy(dst.data() + dst_offset,
361  src.data() + src_offset,
362  size,
363  dst.memory_type(),
364  src.memory_type(),
365  stream);
366 }
367 
368 template <bool bounds_check, typename T, typename U>
370  buffer<U>&& src,
371  typename buffer<T>::index_type dst_offset,
372  cuda_stream stream)
373 {
374  copy<bounds_check>(dst, src, dst_offset, 0, src.size(), stream);
375 }
376 
377 template <bool bounds_check, typename T, typename U>
379 {
380  copy<bounds_check>(dst, src, 0, 0, src.size(), stream);
381 }
382 template <bool bounds_check, typename T, typename U>
384 {
385  copy<bounds_check>(dst, src, 0, 0, src.size(), cuda_stream{});
386 }
387 
388 } // namespace raft_proto
#define DEVICE
Definition: gpu_support.hpp:24
#define HOST
Definition: gpu_support.hpp:23
Definition: buffer.hpp:24
const_agnostic_same_t< T, U > copy(buffer< T > &&dst, buffer< U > &&src)
Definition: buffer.hpp:383
int cuda_stream
Definition: cuda_stream.hpp:14
std::enable_if_t< std::is_same_v< std::remove_const_t< T >, std::remove_const_t< U > >, V > const_agnostic_same_t
Definition: const_agnostic.hpp:11
device_type
Definition: device_type.hpp:7
std::variant< device_id< device_type::cpu >, device_id< device_type::gpu > > device_id_variant
Definition: device_id.hpp:20
A container which may or may not own its own data on host or device.
Definition: buffer.hpp:30
T value_type
Definition: buffer.hpp:32
buffer(iter_t const &begin, iter_t const &end, device_type mem_type, int device, cuda_stream stream=cuda_stream{})
Definition: buffer.hpp:273
buffer(buffer< T > const &other, device_type mem_type, int device=0, cuda_stream stream=cuda_stream{})
Construct one buffer from another in the given memory location (either on host or on device) A buffer...
Definition: buffer.hpp:116
buffer(iter_t const &begin, iter_t const &end)
Definition: buffer.hpp:255
std::variant< non_owning_buffer< device_type::cpu, T >, non_owning_buffer< device_type::gpu, T >, owning_buffer< device_type::cpu, T >, owning_buffer< device_type::gpu, T > > data_store
Definition: buffer.hpp:37
buffer(buffer< T > const &other, cuda_stream stream=cuda_stream{})
Create owning copy of existing buffer with given stream The memory type of this new buffer will be th...
Definition: buffer.hpp:161
buffer()
Definition: buffer.hpp:39
auto size() const noexcept
Definition: buffer.hpp:282
buffer(buffer< T > &&other, device_type mem_type, int device)
Definition: buffer.hpp:230
friend void swap(buffer< T > &first, buffer< T > &second)
Create owning copy of existing buffer The memory type of this new buffer will be the same as the orig...
Definition: buffer.hpp:170
buffer(index_type size, device_type mem_type=device_type::cpu, int device=0, cuda_stream stream=0)
Definition: buffer.hpp:42
buffer< T > & operator=(buffer< T > &&other) noexcept
Definition: buffer.hpp:243
HOST DEVICE auto * data() const noexcept
Definition: buffer.hpp:283
buffer(iter_t const &begin, iter_t const &end, device_type mem_type)
Definition: buffer.hpp:265
std::size_t index_type
Definition: buffer.hpp:31
auto memory_type() const noexcept
Definition: buffer.hpp:284
buffer(T *input_data, index_type size, device_type mem_type=device_type::cpu, int device=0)
Definition: buffer.hpp:79
buffer< T > & operator=(buffer< T > const &other)
Definition: buffer.hpp:178
buffer(buffer< T > &&other) noexcept
Definition: buffer.hpp:239
buffer(buffer< T > &&other, device_type mem_type)
Definition: buffer.hpp:234
auto device_index() const noexcept
Definition: buffer.hpp:297
auto device() const noexcept
Definition: buffer.hpp:295
buffer(buffer< T > &&other, device_type mem_type, int device, cuda_stream stream)
Move from existing buffer unless a copy is necessary based on memory location.
Definition: buffer.hpp:189
Definition: base.hpp:11
Definition: base.hpp:16
Definition: exceptions.hpp:18