device_buffer.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2022, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <rmm/cuda_device.hpp>
19 #include <rmm/cuda_stream_view.hpp>
20 #include <rmm/detail/error.hpp>
23 
24 #include <cuda_runtime_api.h>
25 
26 #include <cassert>
27 #include <cstddef>
28 #include <stdexcept>
29 #include <utility>
30 
31 #include <cuda/memory_resource>
32 
33 namespace rmm {
85  using async_resource_ref = cuda::mr::async_resource_ref<cuda::mr::device_accessible>;
86 
87  public:
88  // The copy constructor and copy assignment operator without a stream are deleted because they
89  // provide no way to specify an explicit stream
90  device_buffer(device_buffer const& other) = delete;
91  device_buffer& operator=(device_buffer const& other) = delete;
92 
96  // Note: we cannot use `device_buffer() = default;` because nvcc implicitly adds
97  // `__host__ __device__` specifiers to the defaulted constructor when it is called within the
98  // context of both host and device functions. Specifically, the `cudf::type_dispatcher` is a host-
99  // device function. This causes warnings/errors because this ctor invokes host-only functions.
101 
112  explicit device_buffer(std::size_t size,
114  async_resource_ref mr = mr::get_current_device_resource())
115  : _stream{stream}, _mr{mr}
116  {
117  cuda_set_device_raii dev{_device};
118  allocate_async(size);
119  }
120 
140  device_buffer(void const* source_data,
141  std::size_t size,
143  async_resource_ref mr = rmm::mr::get_current_device_resource())
144  : _stream{stream}, _mr{mr}
145  {
146  cuda_set_device_raii dev{_device};
147  allocate_async(size);
148  copy_async(source_data, size);
149  }
150 
174  async_resource_ref mr = rmm::mr::get_current_device_resource())
175  : device_buffer{other.data(), other.size(), stream, mr}
176  {
177  }
178 
190  device_buffer(device_buffer&& other) noexcept
191  : _data{other._data},
192  _size{other._size},
193  _capacity{other._capacity},
194  _stream{other.stream()},
195  _mr{other._mr},
196  _device{other._device}
197  {
198  other._data = nullptr;
199  other._size = 0;
200  other._capacity = 0;
201  other.set_stream(cuda_stream_view{});
202  other._device = cuda_device_id{-1};
203  }
204 
220  {
221  if (&other != this) {
222  cuda_set_device_raii dev{_device};
223  deallocate_async();
224 
225  _data = other._data;
226  _size = other._size;
227  _capacity = other._capacity;
228  set_stream(other.stream());
229  _mr = other._mr;
230  _device = other._device;
231 
232  other._data = nullptr;
233  other._size = 0;
234  other._capacity = 0;
235  other.set_stream(cuda_stream_view{});
236  other._device = cuda_device_id{-1};
237  }
238  return *this;
239  }
240 
248  ~device_buffer() noexcept
249  {
250  cuda_set_device_raii dev{_device};
251  deallocate_async();
252  _stream = cuda_stream_view{};
253  }
254 
273  void reserve(std::size_t new_capacity, cuda_stream_view stream)
274  {
276  if (new_capacity > capacity()) {
277  cuda_set_device_raii dev{_device};
278  auto tmp = device_buffer{new_capacity, stream, _mr};
279  auto const old_size = size();
280  RMM_CUDA_TRY(cudaMemcpyAsync(tmp.data(), data(), size(), cudaMemcpyDefault, stream.value()));
281  *this = std::move(tmp);
282  _size = old_size;
283  }
284  }
285 
311  void resize(std::size_t new_size, cuda_stream_view stream)
312  {
314  // If the requested size is smaller than the current capacity, just update
315  // the size without any allocations
316  if (new_size <= capacity()) {
317  _size = new_size;
318  } else {
319  cuda_set_device_raii dev{_device};
320  auto tmp = device_buffer{new_size, stream, _mr};
321  RMM_CUDA_TRY(cudaMemcpyAsync(tmp.data(), data(), size(), cudaMemcpyDefault, stream.value()));
322  *this = std::move(tmp);
323  }
324  }
325 
340  {
342  if (size() != capacity()) {
343  cuda_set_device_raii dev{_device};
344  // Invoke copy ctor on self which only copies `[0, size())` and swap it
345  // with self. The temporary `device_buffer` will hold the old contents
346  // which will then be destroyed
347  auto tmp = device_buffer{*this, stream, _mr};
348  std::swap(tmp, *this);
349  }
350  }
351 
355  [[nodiscard]] void const* data() const noexcept { return _data; }
356 
360  void* data() noexcept { return _data; }
361 
365  [[nodiscard]] std::size_t size() const noexcept { return _size; }
366 
370  [[nodiscard]] std::int64_t ssize() const noexcept
371  {
372  assert(size() < static_cast<std::size_t>(std::numeric_limits<int64_t>::max()) &&
373  "Size overflows signed integer");
374  return static_cast<int64_t>(size());
375  }
376 
383  [[nodiscard]] bool is_empty() const noexcept { return 0 == size(); }
384 
392  [[nodiscard]] std::size_t capacity() const noexcept { return _capacity; }
393 
397  [[nodiscard]] cuda_stream_view stream() const noexcept { return _stream; }
398 
410  void set_stream(cuda_stream_view stream) noexcept { _stream = stream; }
411 
415  [[nodiscard]] async_resource_ref memory_resource() const noexcept { return _mr; }
416 
417  private:
418  void* _data{nullptr};
419  std::size_t _size{};
420  std::size_t _capacity{};
421  cuda_stream_view _stream{};
422 
423  async_resource_ref _mr{
426  cuda_device_id _device{get_current_cuda_device()};
427 
437  void allocate_async(std::size_t bytes)
438  {
439  _size = bytes;
440  _capacity = bytes;
441  _data = (bytes > 0) ? _mr.allocate_async(bytes, stream()) : nullptr;
442  }
443 
453  void deallocate_async() noexcept
454  {
455  if (capacity() > 0) { _mr.deallocate_async(data(), capacity(), stream()); }
456  _size = 0;
457  _capacity = 0;
458  _data = nullptr;
459  }
460 
473  void copy_async(void const* source, std::size_t bytes)
474  {
475  if (bytes > 0) {
476  RMM_EXPECTS(nullptr != source, "Invalid copy from nullptr.");
477  RMM_EXPECTS(nullptr != _data, "Invalid copy to nullptr.");
478 
479  RMM_CUDA_TRY(cudaMemcpyAsync(_data, source, bytes, cudaMemcpyDefault, stream().value()));
480  }
481  }
482 };
483  // end of group
485 } // namespace rmm
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:41
constexpr cudaStream_t value() const noexcept
Get the wrapped stream.
Definition: cuda_stream_view.hpp:75
RAII construct for device memory allocation.
Definition: device_buffer.hpp:84
cuda_stream_view stream() const noexcept
The stream most recently specified for allocation/deallocation.
Definition: device_buffer.hpp:397
void resize(std::size_t new_size, cuda_stream_view stream)
Resize the device memory allocation.
Definition: device_buffer.hpp:311
device_buffer(std::size_t size, cuda_stream_view stream, async_resource_ref mr=mr::get_current_device_resource())
Constructs a new device buffer of size uninitialized bytes.
Definition: device_buffer.hpp:112
async_resource_ref memory_resource() const noexcept
The async_resource_ref used to allocate and deallocate.
Definition: device_buffer.hpp:415
device_buffer(device_buffer const &other, cuda_stream_view stream, async_resource_ref mr=rmm::mr::get_current_device_resource())
Construct a new device_buffer by deep copying the contents of another device_buffer,...
Definition: device_buffer.hpp:172
void * data() noexcept
Pointer to the device memory allocation.
Definition: device_buffer.hpp:360
~device_buffer() noexcept
Destroy the device buffer object.
Definition: device_buffer.hpp:248
device_buffer & operator=(device_buffer &&other) noexcept
Move assignment operator moves the contents from other.
Definition: device_buffer.hpp:219
device_buffer()
Default constructor creates an empty device_buffer
Definition: device_buffer.hpp:100
std::size_t capacity() const noexcept
Returns actual size in bytes of device memory allocation.
Definition: device_buffer.hpp:392
void const * data() const noexcept
Const pointer to the device memory allocation.
Definition: device_buffer.hpp:355
void reserve(std::size_t new_capacity, cuda_stream_view stream)
Increase the capacity of the device memory allocation.
Definition: device_buffer.hpp:273
void set_stream(cuda_stream_view stream) noexcept
Sets the stream to be used for deallocation.
Definition: device_buffer.hpp:410
std::size_t size() const noexcept
The number of bytes.
Definition: device_buffer.hpp:365
device_buffer(device_buffer &&other) noexcept
Constructs a new device_buffer by moving the contents of another device_buffer into the newly constru...
Definition: device_buffer.hpp:190
device_buffer(void const *source_data, std::size_t size, cuda_stream_view stream, async_resource_ref mr=rmm::mr::get_current_device_resource())
Construct a new device buffer by copying from a raw pointer to an existing host or device memory allo...
Definition: device_buffer.hpp:140
void shrink_to_fit(cuda_stream_view stream)
Forces the deallocation of unused memory.
Definition: device_buffer.hpp:339
std::int64_t ssize() const noexcept
The signed number of bytes.
Definition: device_buffer.hpp:370
bool is_empty() const noexcept
Whether or not the buffer currently holds any data.
Definition: device_buffer.hpp:383
cuda_device_id get_current_cuda_device()
Returns a cuda_device_id for the current device.
Definition: cuda_device.hpp:86
device_memory_resource * get_current_device_resource()
Get the memory resource for the current device.
Definition: per_device_resource.hpp:207
Management of per-device device_memory_resources.
Strong type for a CUDA device identifier.
Definition: cuda_device.hpp:33
RAII class that sets the current CUDA device to the specified device on construction and restores the...
Definition: cuda_device.hpp:109