All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Modules Pages
device_buffer.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <rmm/cuda_device.hpp>
19 #include <rmm/cuda_stream_view.hpp>
20 #include <rmm/detail/error.hpp>
21 #include <rmm/detail/export.hpp>
23 #include <rmm/resource_ref.hpp>
24 
25 #include <cuda/memory_resource>
26 #include <cuda_runtime_api.h>
27 
28 #include <cassert>
29 #include <cstddef>
30 #include <utility>
31 
32 namespace RMM_NAMESPACE {
84  public:
85  // The copy constructor and copy assignment operator without a stream are deleted because they
86  // provide no way to specify an explicit stream
87  device_buffer(device_buffer const& other) = delete;
88  device_buffer& operator=(device_buffer const& other) = delete;
89 
93  // Note: we cannot use `device_buffer() = default;` because nvcc implicitly adds
94  // `__host__ __device__` specifiers to the defaulted constructor when it is called within the
95  // context of both host and device functions.
97 
108  explicit device_buffer(std::size_t size,
109  cuda_stream_view stream,
111  : _stream{stream}, _mr{mr}
112  {
113  cuda_set_device_raii dev{_device};
114  allocate_async(size);
115  }
116 
136  device_buffer(void const* source_data,
137  std::size_t size,
138  cuda_stream_view stream,
140  : _stream{stream}, _mr{mr}
141  {
142  cuda_set_device_raii dev{_device};
143  allocate_async(size);
144  copy_async(source_data, size);
145  }
146 
169  cuda_stream_view stream,
171  : device_buffer{other.data(), other.size(), stream, mr}
172  {
173  }
174 
186  device_buffer(device_buffer&& other) noexcept
187  : _data{other._data},
188  _size{other._size},
189  _capacity{other._capacity},
190  _stream{other.stream()},
191  _mr{other._mr},
192  _device{other._device}
193  {
194  other._data = nullptr;
195  other._size = 0;
196  other._capacity = 0;
197  other.set_stream(cuda_stream_view{});
198  other._device = cuda_device_id{-1};
199  }
200 
216  {
217  if (&other != this) {
218  cuda_set_device_raii dev{_device};
219  deallocate_async();
220 
221  _data = other._data;
222  _size = other._size;
223  _capacity = other._capacity;
224  set_stream(other.stream());
225  _mr = other._mr;
226  _device = other._device;
227 
228  other._data = nullptr;
229  other._size = 0;
230  other._capacity = 0;
231  other.set_stream(cuda_stream_view{});
232  other._device = cuda_device_id{-1};
233  }
234  return *this;
235  }
236 
244  ~device_buffer() noexcept
245  {
246  cuda_set_device_raii dev{_device};
247  deallocate_async();
248  _stream = cuda_stream_view{};
249  }
250 
269  void reserve(std::size_t new_capacity, cuda_stream_view stream)
270  {
271  set_stream(stream);
272  if (new_capacity > capacity()) {
273  cuda_set_device_raii dev{_device};
274  auto tmp = device_buffer{new_capacity, stream, _mr};
275  auto const old_size = size();
276  RMM_CUDA_TRY(cudaMemcpyAsync(tmp.data(), data(), size(), cudaMemcpyDefault, stream.value()));
277  *this = std::move(tmp);
278  _size = old_size;
279  }
280  }
281 
307  void resize(std::size_t new_size, cuda_stream_view stream)
308  {
309  set_stream(stream);
310  // If the requested size is smaller than the current capacity, just update
311  // the size without any allocations
312  if (new_size <= capacity()) {
313  _size = new_size;
314  } else {
315  cuda_set_device_raii dev{_device};
316  auto tmp = device_buffer{new_size, stream, _mr};
317  RMM_CUDA_TRY(cudaMemcpyAsync(tmp.data(), data(), size(), cudaMemcpyDefault, stream.value()));
318  *this = std::move(tmp);
319  }
320  }
321 
336  {
337  set_stream(stream);
338  if (size() != capacity()) {
339  cuda_set_device_raii dev{_device};
340  // Invoke copy ctor on self which only copies `[0, size())` and swap it
341  // with self. The temporary `device_buffer` will hold the old contents
342  // which will then be destroyed
343  auto tmp = device_buffer{*this, stream, _mr};
344  std::swap(tmp, *this);
345  }
346  }
347 
351  [[nodiscard]] void const* data() const noexcept { return _data; }
352 
356  void* data() noexcept { return _data; }
357 
361  [[nodiscard]] std::size_t size() const noexcept { return _size; }
362 
366  [[nodiscard]] std::int64_t ssize() const noexcept
367  {
368  assert(size() < static_cast<std::size_t>(std::numeric_limits<int64_t>::max()) &&
369  "Size overflows signed integer");
370  return static_cast<int64_t>(size());
371  }
372 
379  [[nodiscard]] bool is_empty() const noexcept { return 0 == size(); }
380 
388  [[nodiscard]] std::size_t capacity() const noexcept { return _capacity; }
389 
393  [[nodiscard]] cuda_stream_view stream() const noexcept { return _stream; }
394 
406  void set_stream(cuda_stream_view stream) noexcept { _stream = stream; }
407 
411  [[nodiscard]] rmm::device_async_resource_ref memory_resource() const noexcept { return _mr; }
412 
413  private:
414  void* _data{nullptr};
415  std::size_t _size{};
416  std::size_t _capacity{};
417  cuda_stream_view _stream{};
418 
422  cuda_device_id _device{get_current_cuda_device()};
423 
433  void allocate_async(std::size_t bytes)
434  {
435  _size = bytes;
436  _capacity = bytes;
437  _data = (bytes > 0) ? _mr.allocate_async(bytes, stream()) : nullptr;
438  }
439 
449  void deallocate_async() noexcept
450  {
451  if (capacity() > 0) { _mr.deallocate_async(data(), capacity(), stream()); }
452  _size = 0;
453  _capacity = 0;
454  _data = nullptr;
455  }
456 
469  void copy_async(void const* source, std::size_t bytes)
470  {
471  if (bytes > 0) {
472  RMM_EXPECTS(nullptr != source, "Invalid copy from nullptr.");
473  RMM_EXPECTS(nullptr != _data, "Invalid copy to nullptr.");
474 
475  RMM_CUDA_TRY(cudaMemcpyAsync(_data, source, bytes, cudaMemcpyDefault, stream().value()));
476  }
477  }
478 };
479  // end of group
481 } // namespace RMM_NAMESPACE
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:39
constexpr cudaStream_t value() const noexcept
Get the wrapped stream.
Definition: cuda_stream_view.hpp:73
RAII construct for device memory allocation.
Definition: device_buffer.hpp:83
cuda_stream_view stream() const noexcept
The stream most recently specified for allocation/deallocation.
Definition: device_buffer.hpp:393
void resize(std::size_t new_size, cuda_stream_view stream)
Resize the device memory allocation.
Definition: device_buffer.hpp:307
void * data() noexcept
Pointer to the device memory allocation.
Definition: device_buffer.hpp:356
~device_buffer() noexcept
Destroy the device buffer object.
Definition: device_buffer.hpp:244
device_buffer & operator=(device_buffer &&other) noexcept
Move assignment operator moves the contents from other.
Definition: device_buffer.hpp:215
device_buffer()
Default constructor creates an empty device_buffer
Definition: device_buffer.hpp:96
std::size_t capacity() const noexcept
Returns actual size in bytes of device memory allocation.
Definition: device_buffer.hpp:388
void const * data() const noexcept
Const pointer to the device memory allocation.
Definition: device_buffer.hpp:351
void reserve(std::size_t new_capacity, cuda_stream_view stream)
Increase the capacity of the device memory allocation.
Definition: device_buffer.hpp:269
device_buffer(std::size_t size, cuda_stream_view stream, device_async_resource_ref mr=mr::get_current_device_resource_ref())
Constructs a new device buffer of size uninitialized bytes.
Definition: device_buffer.hpp:108
void set_stream(cuda_stream_view stream) noexcept
Sets the stream to be used for deallocation.
Definition: device_buffer.hpp:406
std::size_t size() const noexcept
The number of bytes.
Definition: device_buffer.hpp:361
device_buffer(void const *source_data, std::size_t size, cuda_stream_view stream, device_async_resource_ref mr=mr::get_current_device_resource_ref())
Construct a new device buffer by copying from a raw pointer to an existing host or device memory allo...
Definition: device_buffer.hpp:136
device_buffer(device_buffer &&other) noexcept
Constructs a new device_buffer by moving the contents of another device_buffer into the newly constru...
Definition: device_buffer.hpp:186
device_buffer(device_buffer const &other, cuda_stream_view stream, device_async_resource_ref mr=mr::get_current_device_resource_ref())
Construct a new device_buffer by deep copying the contents of another device_buffer,...
Definition: device_buffer.hpp:168
void shrink_to_fit(cuda_stream_view stream)
Forces the deallocation of unused memory.
Definition: device_buffer.hpp:335
std::int64_t ssize() const noexcept
The signed number of bytes.
Definition: device_buffer.hpp:366
bool is_empty() const noexcept
Whether or not the buffer currently holds any data.
Definition: device_buffer.hpp:379
rmm::device_async_resource_ref memory_resource() const noexcept
The resource used to allocate and deallocate.
Definition: device_buffer.hpp:411
cuda_device_id get_current_cuda_device()
Returns a cuda_device_id for the current device.
Definition: cuda_device.hpp:99
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
Alias for a cuda::mr::async_resource_ref with the property cuda::mr::device_accessible.
Definition: resource_ref.hpp:41
device_async_resource_ref get_current_device_resource_ref()
Get the device_async_resource_ref for the current device.
Definition: per_device_resource.hpp:411
Management of per-device device_memory_resources.
Strong type for a CUDA device identifier.
Definition: cuda_device.hpp:41
RAII class that sets the current CUDA device to the specified device on construction and restores the...
Definition: cuda_device.hpp:151