All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Modules Pages
device_buffer.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <rmm/cuda_device.hpp>
19 #include <rmm/cuda_stream_view.hpp>
20 #include <rmm/detail/error.hpp>
21 #include <rmm/detail/export.hpp>
23 #include <rmm/resource_ref.hpp>
24 
25 #include <cuda/memory_resource>
26 #include <cuda_runtime_api.h>
27 
28 #include <cassert>
29 #include <cstddef>
30 #include <stdexcept>
31 #include <utility>
32 
33 namespace RMM_NAMESPACE {
85  public:
86  // The copy constructor and copy assignment operator without a stream are deleted because they
87  // provide no way to specify an explicit stream
88  device_buffer(device_buffer const& other) = delete;
89  device_buffer& operator=(device_buffer const& other) = delete;
90 
94  // Note: we cannot use `device_buffer() = default;` because nvcc implicitly adds
95  // `__host__ __device__` specifiers to the defaulted constructor when it is called within the
96  // context of both host and device functions. Specifically, the `cudf::type_dispatcher` is a host-
97  // device function. This causes warnings/errors because this ctor invokes host-only functions.
99 
110  explicit device_buffer(std::size_t size,
111  cuda_stream_view stream,
113  : _stream{stream}, _mr{mr}
114  {
115  cuda_set_device_raii dev{_device};
116  allocate_async(size);
117  }
118 
138  device_buffer(void const* source_data,
139  std::size_t size,
140  cuda_stream_view stream,
142  : _stream{stream}, _mr{mr}
143  {
144  cuda_set_device_raii dev{_device};
145  allocate_async(size);
146  copy_async(source_data, size);
147  }
148 
171  cuda_stream_view stream,
173  : device_buffer{other.data(), other.size(), stream, mr}
174  {
175  }
176 
188  device_buffer(device_buffer&& other) noexcept
189  : _data{other._data},
190  _size{other._size},
191  _capacity{other._capacity},
192  _stream{other.stream()},
193  _mr{other._mr},
194  _device{other._device}
195  {
196  other._data = nullptr;
197  other._size = 0;
198  other._capacity = 0;
199  other.set_stream(cuda_stream_view{});
200  other._device = cuda_device_id{-1};
201  }
202 
218  {
219  if (&other != this) {
220  cuda_set_device_raii dev{_device};
221  deallocate_async();
222 
223  _data = other._data;
224  _size = other._size;
225  _capacity = other._capacity;
226  set_stream(other.stream());
227  _mr = other._mr;
228  _device = other._device;
229 
230  other._data = nullptr;
231  other._size = 0;
232  other._capacity = 0;
233  other.set_stream(cuda_stream_view{});
234  other._device = cuda_device_id{-1};
235  }
236  return *this;
237  }
238 
246  ~device_buffer() noexcept
247  {
248  cuda_set_device_raii dev{_device};
249  deallocate_async();
250  _stream = cuda_stream_view{};
251  }
252 
271  void reserve(std::size_t new_capacity, cuda_stream_view stream)
272  {
273  set_stream(stream);
274  if (new_capacity > capacity()) {
275  cuda_set_device_raii dev{_device};
276  auto tmp = device_buffer{new_capacity, stream, _mr};
277  auto const old_size = size();
278  RMM_CUDA_TRY(cudaMemcpyAsync(tmp.data(), data(), size(), cudaMemcpyDefault, stream.value()));
279  *this = std::move(tmp);
280  _size = old_size;
281  }
282  }
283 
309  void resize(std::size_t new_size, cuda_stream_view stream)
310  {
311  set_stream(stream);
312  // If the requested size is smaller than the current capacity, just update
313  // the size without any allocations
314  if (new_size <= capacity()) {
315  _size = new_size;
316  } else {
317  cuda_set_device_raii dev{_device};
318  auto tmp = device_buffer{new_size, stream, _mr};
319  RMM_CUDA_TRY(cudaMemcpyAsync(tmp.data(), data(), size(), cudaMemcpyDefault, stream.value()));
320  *this = std::move(tmp);
321  }
322  }
323 
338  {
339  set_stream(stream);
340  if (size() != capacity()) {
341  cuda_set_device_raii dev{_device};
342  // Invoke copy ctor on self which only copies `[0, size())` and swap it
343  // with self. The temporary `device_buffer` will hold the old contents
344  // which will then be destroyed
345  auto tmp = device_buffer{*this, stream, _mr};
346  std::swap(tmp, *this);
347  }
348  }
349 
353  [[nodiscard]] void const* data() const noexcept { return _data; }
354 
358  void* data() noexcept { return _data; }
359 
363  [[nodiscard]] std::size_t size() const noexcept { return _size; }
364 
368  [[nodiscard]] std::int64_t ssize() const noexcept
369  {
370  assert(size() < static_cast<std::size_t>(std::numeric_limits<int64_t>::max()) &&
371  "Size overflows signed integer");
372  return static_cast<int64_t>(size());
373  }
374 
381  [[nodiscard]] bool is_empty() const noexcept { return 0 == size(); }
382 
390  [[nodiscard]] std::size_t capacity() const noexcept { return _capacity; }
391 
395  [[nodiscard]] cuda_stream_view stream() const noexcept { return _stream; }
396 
408  void set_stream(cuda_stream_view stream) noexcept { _stream = stream; }
409 
413  [[nodiscard]] rmm::device_async_resource_ref memory_resource() const noexcept { return _mr; }
414 
415  private:
416  void* _data{nullptr};
417  std::size_t _size{};
418  std::size_t _capacity{};
419  cuda_stream_view _stream{};
420 
424  cuda_device_id _device{get_current_cuda_device()};
425 
435  void allocate_async(std::size_t bytes)
436  {
437  _size = bytes;
438  _capacity = bytes;
439  _data = (bytes > 0) ? _mr.allocate_async(bytes, stream()) : nullptr;
440  }
441 
451  void deallocate_async() noexcept
452  {
453  if (capacity() > 0) { _mr.deallocate_async(data(), capacity(), stream()); }
454  _size = 0;
455  _capacity = 0;
456  _data = nullptr;
457  }
458 
471  void copy_async(void const* source, std::size_t bytes)
472  {
473  if (bytes > 0) {
474  RMM_EXPECTS(nullptr != source, "Invalid copy from nullptr.");
475  RMM_EXPECTS(nullptr != _data, "Invalid copy to nullptr.");
476 
477  RMM_CUDA_TRY(cudaMemcpyAsync(_data, source, bytes, cudaMemcpyDefault, stream().value()));
478  }
479  }
480 };
481  // end of group
483 } // namespace RMM_NAMESPACE
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:41
constexpr cudaStream_t value() const noexcept
Get the wrapped stream.
Definition: cuda_stream_view.hpp:75
RAII construct for device memory allocation.
Definition: device_buffer.hpp:84
cuda_stream_view stream() const noexcept
The stream most recently specified for allocation/deallocation.
Definition: device_buffer.hpp:395
void resize(std::size_t new_size, cuda_stream_view stream)
Resize the device memory allocation.
Definition: device_buffer.hpp:309
void * data() noexcept
Pointer to the device memory allocation.
Definition: device_buffer.hpp:358
~device_buffer() noexcept
Destroy the device buffer object.
Definition: device_buffer.hpp:246
device_buffer & operator=(device_buffer &&other) noexcept
Move assignment operator moves the contents from other.
Definition: device_buffer.hpp:217
device_buffer()
Default constructor creates an empty device_buffer
Definition: device_buffer.hpp:98
std::size_t capacity() const noexcept
Returns actual size in bytes of device memory allocation.
Definition: device_buffer.hpp:390
void const * data() const noexcept
Const pointer to the device memory allocation.
Definition: device_buffer.hpp:353
void reserve(std::size_t new_capacity, cuda_stream_view stream)
Increase the capacity of the device memory allocation.
Definition: device_buffer.hpp:271
device_buffer(std::size_t size, cuda_stream_view stream, device_async_resource_ref mr=mr::get_current_device_resource_ref())
Constructs a new device buffer of size uninitialized bytes.
Definition: device_buffer.hpp:110
void set_stream(cuda_stream_view stream) noexcept
Sets the stream to be used for deallocation.
Definition: device_buffer.hpp:408
std::size_t size() const noexcept
The number of bytes.
Definition: device_buffer.hpp:363
device_buffer(void const *source_data, std::size_t size, cuda_stream_view stream, device_async_resource_ref mr=mr::get_current_device_resource_ref())
Construct a new device buffer by copying from a raw pointer to an existing host or device memory allo...
Definition: device_buffer.hpp:138
device_buffer(device_buffer &&other) noexcept
Constructs a new device_buffer by moving the contents of another device_buffer into the newly constru...
Definition: device_buffer.hpp:188
device_buffer(device_buffer const &other, cuda_stream_view stream, device_async_resource_ref mr=mr::get_current_device_resource_ref())
Construct a new device_buffer by deep copying the contents of another device_buffer,...
Definition: device_buffer.hpp:170
void shrink_to_fit(cuda_stream_view stream)
Forces the deallocation of unused memory.
Definition: device_buffer.hpp:337
std::int64_t ssize() const noexcept
The signed number of bytes.
Definition: device_buffer.hpp:368
bool is_empty() const noexcept
Whether or not the buffer currently holds any data.
Definition: device_buffer.hpp:381
rmm::device_async_resource_ref memory_resource() const noexcept
The resource used to allocate and deallocate.
Definition: device_buffer.hpp:413
cuda_device_id get_current_cuda_device()
Returns a cuda_device_id for the current device.
Definition: cuda_device.hpp:96
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
Alias for a cuda::mr::async_resource_ref with the property cuda::mr::device_accessible.
Definition: resource_ref.hpp:41
device_async_resource_ref get_current_device_resource_ref()
Get the device_async_resource_ref for the current device.
Definition: per_device_resource.hpp:411
Management of per-device device_memory_resources.
Strong type for a CUDA device identifier.
Definition: cuda_device.hpp:38
RAII class that sets the current CUDA device to the specified device on construction and restores the...
Definition: cuda_device.hpp:148