All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Modules Pages
device_uvector.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020-2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <rmm/cuda_stream_view.hpp>
20 #include <rmm/detail/error.hpp>
21 #include <rmm/detail/exec_check_disable.hpp>
22 #include <rmm/detail/export.hpp>
23 #include <rmm/device_buffer.hpp>
25 #include <rmm/resource_ref.hpp>
26 
27 #include <cuda/memory_resource>
28 
29 #include <cstddef>
30 #include <vector>
31 
32 namespace RMM_NAMESPACE {
76 template <typename T>
78  static_assert(std::is_trivially_copyable<T>::value,
79  "device_uvector only supports types that are trivially copyable.");
80 
81  public:
82  using value_type = T;
83  using size_type = std::size_t;
84  using reference = value_type&;
85  using const_reference = value_type const&;
87  using pointer = value_type*;
88  using const_pointer = value_type const*;
89  using iterator = pointer;
91 
92  RMM_EXEC_CHECK_DISABLE
93  ~device_uvector() = default;
94 
95  RMM_EXEC_CHECK_DISABLE
96  device_uvector(device_uvector&&) noexcept = default;
97 
98  RMM_EXEC_CHECK_DISABLE
99  device_uvector& operator=(device_uvector&&) noexcept =
100  default;
101 
105  device_uvector(device_uvector const&) = delete;
106 
110  device_uvector& operator=(device_uvector const&) = delete;
111 
115  device_uvector() = delete;
116 
128  explicit device_uvector(std::size_t size,
129  cuda_stream_view stream,
131  : _storage{elements_to_bytes(size), stream, mr}
132  {
133  }
134 
144  explicit device_uvector(device_uvector const& other,
145  cuda_stream_view stream,
147  : _storage{other._storage, stream, mr}
148  {
149  }
150 
159  [[nodiscard]] pointer element_ptr(std::size_t element_index) noexcept
160  {
161  assert(element_index < size());
162  return data() + element_index;
163  }
164 
173  [[nodiscard]] const_pointer element_ptr(std::size_t element_index) const noexcept
174  {
175  assert(element_index < size());
176  return data() + element_index;
177  }
178 
215  void set_element_async(std::size_t element_index,
216  value_type const& value,
217  cuda_stream_view stream)
218  {
219  RMM_EXPECTS(
220  element_index < size(), "Attempt to access out of bounds element.", rmm::out_of_range);
221 
222  if constexpr (std::is_same<value_type, bool>::value) {
223  RMM_CUDA_TRY(
224  cudaMemsetAsync(element_ptr(element_index), value, sizeof(value), stream.value()));
225  return;
226  }
227 
228  if constexpr (std::is_fundamental<value_type>::value) {
229  if (value == value_type{0}) {
230  set_element_to_zero_async(element_index, stream);
231  return;
232  }
233  }
234 
235  RMM_CUDA_TRY(cudaMemcpyAsync(
236  element_ptr(element_index), &value, sizeof(value), cudaMemcpyDefault, stream.value()));
237  }
238 
239  // We delete the r-value reference overload to prevent asynchronously copying from a literal or
240  // implicit temporary value after it is deleted or goes out of scope.
241  void set_element_async(std::size_t, value_type const&&, cuda_stream_view) = delete;
242 
265  void set_element_to_zero_async(std::size_t element_index, cuda_stream_view stream)
266  {
267  RMM_EXPECTS(
268  element_index < size(), "Attempt to access out of bounds element.", rmm::out_of_range);
269  RMM_CUDA_TRY(
270  cudaMemsetAsync(element_ptr(element_index), 0, sizeof(value_type), stream.value()));
271  }
272 
302  void set_element(std::size_t element_index, T const& value, cuda_stream_view stream)
303  {
304  set_element_async(element_index, value, stream);
305  stream.synchronize_no_throw();
306  }
307 
320  [[nodiscard]] value_type element(std::size_t element_index, cuda_stream_view stream) const
321  {
322  RMM_EXPECTS(
323  element_index < size(), "Attempt to access out of bounds element.", rmm::out_of_range);
324  value_type value;
325  RMM_CUDA_TRY(cudaMemcpyAsync(
326  &value, element_ptr(element_index), sizeof(value), cudaMemcpyDefault, stream.value()));
327  stream.synchronize();
328  return value;
329  }
330 
342  [[nodiscard]] value_type front_element(cuda_stream_view stream) const
343  {
344  return element(0, stream);
345  }
346 
358  [[nodiscard]] value_type back_element(cuda_stream_view stream) const
359  {
360  return element(size() - 1, stream);
361  }
362 
375  void reserve(std::size_t new_capacity, cuda_stream_view stream)
376  {
377  _storage.reserve(elements_to_bytes(new_capacity), stream);
378  }
379 
396  void resize(std::size_t new_size, cuda_stream_view stream)
397  {
398  _storage.resize(elements_to_bytes(new_size), stream);
399  }
400 
408  void shrink_to_fit(cuda_stream_view stream) { _storage.shrink_to_fit(stream); }
409 
415  device_buffer release() noexcept { return std::move(_storage); }
416 
423  [[nodiscard]] std::size_t capacity() const noexcept
424  {
425  return bytes_to_elements(_storage.capacity());
426  }
427 
436  [[nodiscard]] pointer data() noexcept { return static_cast<pointer>(_storage.data()); }
437 
446  [[nodiscard]] const_pointer data() const noexcept
447  {
448  return static_cast<const_pointer>(_storage.data());
449  }
450 
458  [[nodiscard]] iterator begin() noexcept { return data(); }
459 
467  [[nodiscard]] const_iterator cbegin() const noexcept { return data(); }
468 
476  [[nodiscard]] const_iterator begin() const noexcept { return cbegin(); }
477 
486  [[nodiscard]] iterator end() noexcept { return data() + size(); }
487 
496  [[nodiscard]] const_iterator cend() const noexcept { return data() + size(); }
497 
506  [[nodiscard]] const_iterator end() const noexcept { return cend(); }
507 
511  [[nodiscard]] std::size_t size() const noexcept { return bytes_to_elements(_storage.size()); }
512 
516  [[nodiscard]] std::int64_t ssize() const noexcept
517  {
518  assert(size() < static_cast<std::size_t>(std::numeric_limits<int64_t>::max()) &&
519  "Size overflows signed integer");
520  return static_cast<int64_t>(size());
521  }
522 
526  [[nodiscard]] bool is_empty() const noexcept { return size() == 0; }
527 
532  [[nodiscard]] rmm::device_async_resource_ref memory_resource() const noexcept
533  {
534  return _storage.memory_resource();
535  }
536 
540  [[nodiscard]] cuda_stream_view stream() const noexcept { return _storage.stream(); }
541 
553  void set_stream(cuda_stream_view stream) noexcept { _storage.set_stream(stream); }
554 
555  private:
556  device_buffer _storage{};
557 
558  [[nodiscard]] std::size_t constexpr elements_to_bytes(std::size_t num_elements) const noexcept
559  {
560  return num_elements * sizeof(value_type);
561  }
562 
563  [[nodiscard]] std::size_t constexpr bytes_to_elements(std::size_t num_bytes) const noexcept
564  {
565  return num_bytes / sizeof(value_type);
566  }
567 };
568  // end of group
570 } // namespace RMM_NAMESPACE
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:41
constexpr cudaStream_t value() const noexcept
Get the wrapped stream.
Definition: cuda_stream_view.hpp:75
void synchronize() const
Synchronize the viewed CUDA stream.
Definition: cuda_stream_view.hpp:108
void synchronize_no_throw() const noexcept
Synchronize the viewed CUDA stream. Does not throw if there is an error.
Definition: cuda_stream_view.hpp:115
RAII construct for device memory allocation.
Definition: device_buffer.hpp:84
An uninitialized vector of elements in device memory.
Definition: device_uvector.hpp:77
const_iterator cend() const noexcept
Returns a const_iterator to the element following the last element of the vector.
Definition: device_uvector.hpp:496
std::size_t capacity() const noexcept
Returns the number of elements that can be held in currently allocated storage.
Definition: device_uvector.hpp:423
void resize(std::size_t new_size, cuda_stream_view stream)
Resizes the vector to contain new_size elements.
Definition: device_uvector.hpp:396
value_type * pointer
The type of the pointer returned by data()
Definition: device_uvector.hpp:87
const_pointer element_ptr(std::size_t element_index) const noexcept
Returns pointer to the specified element.
Definition: device_uvector.hpp:173
bool is_empty() const noexcept
true if the vector contains no elements, i.e. size() == 0
Definition: device_uvector.hpp:526
const_pointer data() const noexcept
Returns const pointer to underlying device storage.
Definition: device_uvector.hpp:446
std::size_t size() const noexcept
The number of elements in the vector.
Definition: device_uvector.hpp:511
pointer data() noexcept
Returns pointer to underlying device storage.
Definition: device_uvector.hpp:436
void shrink_to_fit(cuda_stream_view stream)
Forces deallocation of unused device memory.
Definition: device_uvector.hpp:408
iterator end() noexcept
Returns an iterator to the element following the last element of the vector.
Definition: device_uvector.hpp:486
std::size_t size_type
The type used for the size of the vector.
Definition: device_uvector.hpp:83
pointer element_ptr(std::size_t element_index) noexcept
Returns pointer to the specified element.
Definition: device_uvector.hpp:159
std::int64_t ssize() const noexcept
The signed number of elements in the vector.
Definition: device_uvector.hpp:516
T value_type
T; stored value type.
Definition: device_uvector.hpp:82
const_iterator cbegin() const noexcept
Returns a const_iterator to the first element.
Definition: device_uvector.hpp:467
value_type back_element(cuda_stream_view stream) const
Returns the last element.
Definition: device_uvector.hpp:358
void set_element_to_zero_async(std::size_t element_index, cuda_stream_view stream)
Asynchronously sets the specified element to zero in device memory.
Definition: device_uvector.hpp:265
const_pointer const_iterator
The type of the const iterator returned by cbegin()
Definition: device_uvector.hpp:90
device_buffer release() noexcept
Release ownership of device memory storage.
Definition: device_uvector.hpp:415
void set_element_async(std::size_t element_index, value_type const &value, cuda_stream_view stream)
Performs an asynchronous copy of v to the specified element in device memory.
Definition: device_uvector.hpp:215
device_uvector(device_uvector &&) noexcept=default
Default move constructor.
pointer iterator
The type of the iterator returned by begin()
Definition: device_uvector.hpp:89
value_type & reference
value_type&; reference type returned by operator[](size_type)
Definition: device_uvector.hpp:84
const_iterator end() const noexcept
Returns an iterator to the element following the last element of the vector.
Definition: device_uvector.hpp:506
void reserve(std::size_t new_capacity, cuda_stream_view stream)
Increases the capacity of the vector to new_capacity elements.
Definition: device_uvector.hpp:375
value_type element(std::size_t element_index, cuda_stream_view stream) const
Returns the specified element from device memory.
Definition: device_uvector.hpp:320
value_type front_element(cuda_stream_view stream) const
Returns the first element.
Definition: device_uvector.hpp:342
value_type const * const_pointer
The type of the pointer returned by data() const.
Definition: device_uvector.hpp:88
device_uvector(device_uvector const &other, cuda_stream_view stream, device_async_resource_ref mr=mr::get_current_device_resource_ref())
Construct a new device_uvector by deep copying the contents of another device_uvector.
Definition: device_uvector.hpp:144
value_type const & const_reference
Definition: device_uvector.hpp:86
void set_element(std::size_t element_index, T const &value, cuda_stream_view stream)
Performs a synchronous copy of v to the specified element in device memory.
Definition: device_uvector.hpp:302
const_iterator begin() const noexcept
Returns a const_iterator to the first element.
Definition: device_uvector.hpp:476
iterator begin() noexcept
Returns an iterator to the first element.
Definition: device_uvector.hpp:458
Exception thrown when attempting to access outside of a defined range.
Definition: error.hpp:110
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
Alias for a cuda::mr::async_resource_ref with the property cuda::mr::device_accessible.
Definition: resource_ref.hpp:41
device_async_resource_ref get_current_device_resource_ref()
Get the device_async_resource_ref for the current device.
Definition: per_device_resource.hpp:411
Management of per-device device_memory_resources.