device_buffer.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2022, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <rmm/cuda_stream_view.hpp>
19 #include <rmm/detail/error.hpp>
20 #include <rmm/mr/device/device_memory_resource.hpp>
22 
23 #include <cuda_runtime_api.h>
24 
25 #include <cassert>
26 #include <cstddef>
27 #include <stdexcept>
28 #include <utility>
29 
30 namespace rmm {
78  public:
79  // The copy constructor and copy assignment operator without a stream are deleted because they
80  // provide no way to specify an explicit stream
81  device_buffer(device_buffer const& other) = delete;
82  device_buffer& operator=(device_buffer const& other) = delete;
83 
87  // Note: we cannot use `device_buffer() = default;` because nvcc implicitly adds
88  // `__host__ __device__` specifiers to the defaulted constructor when it is called within the
89  // context of both host and device functions. Specifically, the `cudf::type_dispatcher` is a host-
90  // device function. This causes warnings/errors because this ctor invokes host-only functions.
91  device_buffer() : _mr{rmm::mr::get_current_device_resource()} {}
92 
103  explicit device_buffer(std::size_t size,
105  mr::device_memory_resource* mr = mr::get_current_device_resource())
106  : _stream{stream}, _mr{mr}
107  {
108  allocate_async(size);
109  }
110 
130  device_buffer(void const* source_data,
131  std::size_t size,
133  mr::device_memory_resource* mr = mr::get_current_device_resource())
134  : _stream{stream}, _mr{mr}
135  {
136  allocate_async(size);
137  copy_async(source_data, size);
138  }
139 
163  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
164  : device_buffer{other.data(), other.size(), stream, mr}
165  {
166  }
167 
181  device_buffer(device_buffer&& other) noexcept
182  : _data{other._data},
183  _size{other._size},
184  _capacity{other._capacity},
185  _stream{other.stream()},
186  _mr{other._mr}
187  {
188  other._data = nullptr;
189  other._size = 0;
190  other._capacity = 0;
191  other.set_stream(cuda_stream_view{});
192  }
193 
207  {
208  if (&other != this) {
209  deallocate_async();
210 
211  _data = other._data;
212  _size = other._size;
213  _capacity = other._capacity;
214  set_stream(other.stream());
215  _mr = other._mr;
216 
217  other._data = nullptr;
218  other._size = 0;
219  other._capacity = 0;
220  other.set_stream(cuda_stream_view{});
221  }
222  return *this;
223  }
224 
232  ~device_buffer() noexcept
233  {
234  deallocate_async();
235  _mr = nullptr;
236  _stream = cuda_stream_view{};
237  }
238 
264  void resize(std::size_t new_size, cuda_stream_view stream)
265  {
267  // If the requested size is smaller than the current capacity, just update
268  // the size without any allocations
269  if (new_size <= capacity()) {
270  _size = new_size;
271  } else {
272  void* const new_data = _mr->allocate(new_size, this->stream());
273  RMM_CUDA_TRY(
274  cudaMemcpyAsync(new_data, data(), size(), cudaMemcpyDefault, this->stream().value()));
275  deallocate_async();
276  _data = new_data;
277  _size = new_size;
278  _capacity = new_size;
279  }
280  }
281 
296  {
298  if (size() != capacity()) {
299  // Invoke copy ctor on self which only copies `[0, size())` and swap it
300  // with self. The temporary `device_buffer` will hold the old contents
301  // which will then be destroyed
302  auto tmp = device_buffer{*this, stream, _mr};
303  std::swap(tmp, *this);
304  }
305  }
306 
310  [[nodiscard]] void const* data() const noexcept { return _data; }
311 
315  void* data() noexcept { return _data; }
316 
320  [[nodiscard]] std::size_t size() const noexcept { return _size; }
321 
325  [[nodiscard]] std::int64_t ssize() const noexcept
326  {
327  assert(size() < static_cast<std::size_t>(std::numeric_limits<int64_t>::max()) &&
328  "Size overflows signed integer");
329  return static_cast<int64_t>(size());
330  }
331 
339  [[nodiscard]] bool is_empty() const noexcept { return 0 == size(); }
340 
346  [[nodiscard]] std::size_t capacity() const noexcept { return _capacity; }
347 
351  [[nodiscard]] cuda_stream_view stream() const noexcept { return _stream; }
352 
362  void set_stream(cuda_stream_view stream) noexcept { _stream = stream; }
363 
368  [[nodiscard]] mr::device_memory_resource* memory_resource() const noexcept { return _mr; }
369 
370  private:
371  void* _data{nullptr};
372  std::size_t _size{};
373  std::size_t _capacity{};
374  cuda_stream_view _stream{};
375  mr::device_memory_resource* _mr{
376  mr::get_current_device_resource()};
377 
388  void allocate_async(std::size_t bytes)
389  {
390  _size = bytes;
391  _capacity = bytes;
392  _data = (bytes > 0) ? memory_resource()->allocate(bytes, stream()) : nullptr;
393  }
394 
404  void deallocate_async() noexcept
405  {
406  if (capacity() > 0) { memory_resource()->deallocate(data(), capacity(), stream()); }
407  _size = 0;
408  _capacity = 0;
409  _data = nullptr;
410  }
411 
424  void copy_async(void const* source, std::size_t bytes)
425  {
426  if (bytes > 0) {
427  RMM_EXPECTS(nullptr != source, "Invalid copy from nullptr.");
428 
429  RMM_CUDA_TRY(cudaMemcpyAsync(_data, source, bytes, cudaMemcpyDefault, stream().value()));
430  }
431  }
432 };
433 } // namespace rmm
rmm::device_buffer::memory_resource
mr::device_memory_resource * memory_resource() const noexcept
Returns pointer to the memory resource used to allocate and deallocate the device memory.
Definition: device_buffer.hpp:368
rmm::device_buffer::shrink_to_fit
void shrink_to_fit(cuda_stream_view stream)
Forces the deallocation of unused memory.
Definition: device_buffer.hpp:295
rmm::mr::device_memory_resource::allocate
void * allocate(std::size_t bytes, cuda_stream_view stream=cuda_stream_view{})
Allocates memory of size at least bytes.
Definition: device_memory_resource.hpp:106
rmm::device_buffer::device_buffer
device_buffer(void const *source_data, std::size_t size, cuda_stream_view stream, mr::device_memory_resource *mr=mr::get_current_device_resource())
Construct a new device buffer by copying from a raw pointer to an existing host or device memory allo...
Definition: device_buffer.hpp:130
per_device_resource.hpp
Management of per-device device_memory_resources.
rmm::cuda_stream_view
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:34
rmm::device_buffer::size
std::size_t size() const noexcept
Returns the number of bytes.
Definition: device_buffer.hpp:320
rmm::device_buffer::device_buffer
device_buffer()
Default constructor creates an empty device_buffer
Definition: device_buffer.hpp:91
rmm::mr::device_memory_resource::deallocate
void deallocate(void *ptr, std::size_t bytes, cuda_stream_view stream=cuda_stream_view{})
Deallocate memory pointed to by p.
Definition: device_memory_resource.hpp:129
rmm::device_buffer::device_buffer
device_buffer(device_buffer const &other, cuda_stream_view stream, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Construct a new device_buffer by deep copying the contents of another device_buffer,...
Definition: device_buffer.hpp:161
rmm::device_buffer::device_buffer
device_buffer(std::size_t size, cuda_stream_view stream, mr::device_memory_resource *mr=mr::get_current_device_resource())
Constructs a new device buffer of size uninitialized bytes.
Definition: device_buffer.hpp:103
rmm::device_buffer::ssize
std::int64_t ssize() const noexcept
Returns the signed number of bytes.
Definition: device_buffer.hpp:325
rmm::device_buffer
Definition: device_buffer.hpp:77
rmm::device_buffer::set_stream
void set_stream(cuda_stream_view stream) noexcept
Sets the stream to be used for deallocation.
Definition: device_buffer.hpp:362
rmm::device_buffer::data
void * data() noexcept
Returns raw pointer to underlying device memory allocation.
Definition: device_buffer.hpp:315
rmm::device_buffer::stream
cuda_stream_view stream() const noexcept
Returns stream most recently specified for allocation/deallocation.
Definition: device_buffer.hpp:351
rmm::device_buffer::resize
void resize(std::size_t new_size, cuda_stream_view stream)
Resize the device memory allocation.
Definition: device_buffer.hpp:264
rmm::device_buffer::is_empty
bool is_empty() const noexcept
returns the number of bytes that can be held in currently allocated storage.
Definition: device_buffer.hpp:339
rmm::device_buffer::data
void const * data() const noexcept
Returns raw pointer to underlying device memory allocation.
Definition: device_buffer.hpp:310
rmm::mr::device_memory_resource
Base class for all libcudf device memory allocation.
Definition: device_memory_resource.hpp:82
rmm::device_buffer::capacity
std::size_t capacity() const noexcept
Returns actual size in bytes of device memory allocation.
Definition: device_buffer.hpp:346
rmm::device_buffer::operator=
device_buffer & operator=(device_buffer &&other) noexcept
Move assignment operator moves the contents from other.
Definition: device_buffer.hpp:206
rmm::device_buffer::~device_buffer
~device_buffer() noexcept
Destroy the device buffer object.
Definition: device_buffer.hpp:232
rmm::device_buffer::device_buffer
device_buffer(device_buffer &&other) noexcept
Constructs a new device_buffer by moving the contents of another device_buffer into the newly constru...
Definition: device_buffer.hpp:181