device_uvector.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020-2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <rmm/cuda_stream_view.hpp>
20 #include <rmm/detail/error.hpp>
21 #include <rmm/detail/exec_check_disable.hpp>
22 #include <rmm/detail/export.hpp>
23 #include <rmm/device_buffer.hpp>
25 #include <rmm/resource_ref.hpp>
26 
27 #include <thrust/iterator/reverse_iterator.h>
28 
29 #include <cstddef>
30 #include <type_traits>
31 #include <utility>
32 
33 namespace RMM_NAMESPACE {
77 template <typename T>
79  static_assert(std::is_trivially_copyable_v<T>,
80  "device_uvector only supports types that are trivially copyable.");
81 
82  public:
83  using value_type = T;
84  using size_type = std::size_t;
85  using reference = value_type&;
86  using const_reference = value_type const&;
88  using pointer = value_type*;
89  using const_pointer = value_type const*;
90  using iterator = pointer;
93  thrust::reverse_iterator<iterator>;
95  thrust::reverse_iterator<const_iterator>;
96 
97  RMM_EXEC_CHECK_DISABLE
98  ~device_uvector() = default;
99 
100  RMM_EXEC_CHECK_DISABLE
101  device_uvector(device_uvector&&) noexcept = default;
102 
103  RMM_EXEC_CHECK_DISABLE
104  device_uvector& operator=(device_uvector&&) noexcept =
105  default;
106 
110  device_uvector(device_uvector const&) = delete;
111 
115  device_uvector& operator=(device_uvector const&) = delete;
116 
120  device_uvector() = delete;
121 
133  explicit device_uvector(size_type size,
134  cuda_stream_view stream,
136  : _storage{elements_to_bytes(size), stream, mr}
137  {
138  }
139 
149  explicit device_uvector(device_uvector const& other,
150  cuda_stream_view stream,
152  : _storage{other._storage, stream, mr}
153  {
154  }
155 
164  [[nodiscard]] pointer element_ptr(size_type element_index) noexcept
165  {
166  assert(element_index < size());
167  return data() + element_index;
168  }
169 
178  [[nodiscard]] const_pointer element_ptr(size_type element_index) const noexcept
179  {
180  assert(element_index < size());
181  return data() + element_index;
182  }
183 
220  void set_element_async(size_type element_index, value_type const& value, cuda_stream_view stream)
221  {
222  RMM_EXPECTS(
223  element_index < size(), "Attempt to access out of bounds element.", rmm::out_of_range);
224 
225  if constexpr (std::is_same_v<value_type, bool>) {
226  RMM_CUDA_TRY(
227  cudaMemsetAsync(element_ptr(element_index), value, sizeof(value), stream.value()));
228  return;
229  }
230 
231  if constexpr (std::is_fundamental_v<value_type>) {
232  if (value == value_type{0}) {
233  set_element_to_zero_async(element_index, stream);
234  return;
235  }
236  }
237 
238  RMM_CUDA_TRY(cudaMemcpyAsync(
239  element_ptr(element_index), &value, sizeof(value), cudaMemcpyDefault, stream.value()));
240  }
241 
242  // We delete the r-value reference overload to prevent asynchronously copying from a literal or
243  // implicit temporary value after it is deleted or goes out of scope.
244  void set_element_async(size_type, value_type const&&, cuda_stream_view) = delete;
245 
269  {
270  RMM_EXPECTS(
271  element_index < size(), "Attempt to access out of bounds element.", rmm::out_of_range);
272  RMM_CUDA_TRY(
273  cudaMemsetAsync(element_ptr(element_index), 0, sizeof(value_type), stream.value()));
274  }
275 
305  void set_element(size_type element_index, T const& value, cuda_stream_view stream)
306  {
307  set_element_async(element_index, value, stream);
308  stream.synchronize_no_throw();
309  }
310 
323  [[nodiscard]] value_type element(size_type element_index, cuda_stream_view stream) const
324  {
325  RMM_EXPECTS(
326  element_index < size(), "Attempt to access out of bounds element.", rmm::out_of_range);
327  value_type value;
328  RMM_CUDA_TRY(cudaMemcpyAsync(
329  &value, element_ptr(element_index), sizeof(value), cudaMemcpyDefault, stream.value()));
330  stream.synchronize();
331  return value;
332  }
333 
345  [[nodiscard]] value_type front_element(cuda_stream_view stream) const
346  {
347  return element(0, stream);
348  }
349 
361  [[nodiscard]] value_type back_element(cuda_stream_view stream) const
362  {
363  return element(size() - 1, stream);
364  }
365 
378  void reserve(size_type new_capacity, cuda_stream_view stream)
379  {
380  _storage.reserve(elements_to_bytes(new_capacity), stream);
381  }
382 
399  void resize(size_type new_size, cuda_stream_view stream)
400  {
401  _storage.resize(elements_to_bytes(new_size), stream);
402  }
403 
411  void shrink_to_fit(cuda_stream_view stream) { _storage.shrink_to_fit(stream); }
412 
418  device_buffer release() noexcept { return std::move(_storage); }
419 
426  [[nodiscard]] size_type capacity() const noexcept
427  {
428  return bytes_to_elements(_storage.capacity());
429  }
430 
439  [[nodiscard]] pointer data() noexcept { return static_cast<pointer>(_storage.data()); }
440 
449  [[nodiscard]] const_pointer data() const noexcept
450  {
451  return static_cast<const_pointer>(_storage.data());
452  }
453 
461  [[nodiscard]] iterator begin() noexcept { return data(); }
462 
470  [[nodiscard]] const_iterator cbegin() const noexcept { return data(); }
471 
479  [[nodiscard]] const_iterator begin() const noexcept { return cbegin(); }
480 
489  [[nodiscard]] iterator end() noexcept { return data() + size(); }
490 
499  [[nodiscard]] const_iterator cend() const noexcept { return data() + size(); }
500 
509  [[nodiscard]] const_iterator end() const noexcept { return cend(); }
510 
518  [[nodiscard]] reverse_iterator rbegin() noexcept { return reverse_iterator(end()); }
519 
527  [[nodiscard]] const_reverse_iterator crbegin() const noexcept
528  {
529  return const_reverse_iterator(cend());
530  }
531 
539  [[nodiscard]] const_reverse_iterator rbegin() const noexcept { return crbegin(); }
540 
549  [[nodiscard]] reverse_iterator rend() noexcept { return reverse_iterator(begin()); }
550 
560  [[nodiscard]] const_reverse_iterator crend() const noexcept
561  {
562  return const_reverse_iterator(begin());
563  }
564 
573  [[nodiscard]] const_reverse_iterator rend() const noexcept { return crend(); }
574 
578  [[nodiscard]] size_type size() const noexcept { return bytes_to_elements(_storage.size()); }
579 
583  [[nodiscard]] std::int64_t ssize() const noexcept
584  {
585  assert(size() < static_cast<size_type>(std::numeric_limits<int64_t>::max()) &&
586  "Size overflows signed integer");
587  return static_cast<int64_t>(size());
588  }
589 
593  [[nodiscard]] bool is_empty() const noexcept { return size() == 0; }
594 
599  [[nodiscard]] rmm::device_async_resource_ref memory_resource() const noexcept
600  {
601  return _storage.memory_resource();
602  }
603 
607  [[nodiscard]] cuda_stream_view stream() const noexcept { return _storage.stream(); }
608 
620  void set_stream(cuda_stream_view stream) noexcept { _storage.set_stream(stream); }
621 
622  private:
623  device_buffer _storage{};
624 
625  [[nodiscard]] size_type constexpr elements_to_bytes(size_type num_elements) const noexcept
626  {
627  return num_elements * sizeof(value_type);
628  }
629 
630  [[nodiscard]] size_type constexpr bytes_to_elements(size_type num_bytes) const noexcept
631  {
632  return num_bytes / sizeof(value_type);
633  }
634 };
635  // end of group
637 } // namespace RMM_NAMESPACE
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:39
void synchronize() const
Synchronize the viewed CUDA stream.
void synchronize_no_throw() const noexcept
Synchronize the viewed CUDA stream. Does not throw if there is an error.
cudaStream_t value() const noexcept
Get the wrapped stream.
RAII construct for device memory allocation.
Definition: device_buffer.hpp:81
An uninitialized vector of elements in device memory.
Definition: device_uvector.hpp:78
reverse_iterator rend() noexcept
Returns reverse_iterator to the element preceding the first element of the vector.
Definition: device_uvector.hpp:549
const_iterator cend() const noexcept
Returns a const_iterator to the element following the last element of the vector.
Definition: device_uvector.hpp:499
const_reverse_iterator crend() const noexcept
Returns a const_reverse_iterator to the element preceding the first element of the vector.
Definition: device_uvector.hpp:560
value_type * pointer
The type of the pointer returned by data()
Definition: device_uvector.hpp:88
thrust::reverse_iterator< const_iterator > const_reverse_iterator
The type of the iterator returned by crbegin()
Definition: device_uvector.hpp:95
void set_element_async(size_type element_index, value_type const &value, cuda_stream_view stream)
Performs an asynchronous copy of v to the specified element in device memory.
Definition: device_uvector.hpp:220
bool is_empty() const noexcept
true if the vector contains no elements, i.e. size() == 0
Definition: device_uvector.hpp:593
size_type size() const noexcept
The number of elements in the vector.
Definition: device_uvector.hpp:578
const_pointer data() const noexcept
Returns const pointer to underlying device storage.
Definition: device_uvector.hpp:449
void resize(size_type new_size, cuda_stream_view stream)
Resizes the vector to contain new_size elements.
Definition: device_uvector.hpp:399
void set_element(size_type element_index, T const &value, cuda_stream_view stream)
Performs a synchronous copy of v to the specified element in device memory.
Definition: device_uvector.hpp:305
thrust::reverse_iterator< iterator > reverse_iterator
The type of the iterator returned by rbegin()
Definition: device_uvector.hpp:93
reverse_iterator rbegin() noexcept
Returns a reverse_iterator to the last element.
Definition: device_uvector.hpp:518
pointer data() noexcept
Returns pointer to underlying device storage.
Definition: device_uvector.hpp:439
void shrink_to_fit(cuda_stream_view stream)
Forces deallocation of unused device memory.
Definition: device_uvector.hpp:411
iterator end() noexcept
Returns an iterator to the element following the last element of the vector.
Definition: device_uvector.hpp:489
std::size_t size_type
The type used for the size of the vector.
Definition: device_uvector.hpp:84
const_reverse_iterator crbegin() const noexcept
Returns a const_reverse_iterator to the last element.
Definition: device_uvector.hpp:527
size_type capacity() const noexcept
Returns the number of elements that can be held in currently allocated storage.
Definition: device_uvector.hpp:426
std::int64_t ssize() const noexcept
The signed number of elements in the vector.
Definition: device_uvector.hpp:583
T value_type
T; stored value type.
Definition: device_uvector.hpp:83
const_iterator cbegin() const noexcept
Returns a const_iterator to the first element.
Definition: device_uvector.hpp:470
value_type back_element(cuda_stream_view stream) const
Returns the last element.
Definition: device_uvector.hpp:361
const_pointer const_iterator
The type of the const iterator returned by cbegin()
Definition: device_uvector.hpp:91
device_buffer release() noexcept
Release ownership of device memory storage.
Definition: device_uvector.hpp:418
void set_element_to_zero_async(size_type element_index, cuda_stream_view stream)
Asynchronously sets the specified element to zero in device memory.
Definition: device_uvector.hpp:268
device_uvector(device_uvector &&) noexcept=default
Default move constructor.
pointer iterator
The type of the iterator returned by begin()
Definition: device_uvector.hpp:90
void reserve(size_type new_capacity, cuda_stream_view stream)
Increases the capacity of the vector to new_capacity elements.
Definition: device_uvector.hpp:378
value_type & reference
value_type&; reference type returned by operator[](size_type)
Definition: device_uvector.hpp:85
const_reverse_iterator rend() const noexcept
Returns const_reverse_iterator to the element preceding the first element of the vector.
Definition: device_uvector.hpp:573
const_iterator end() const noexcept
Returns an iterator to the element following the last element of the vector.
Definition: device_uvector.hpp:509
pointer element_ptr(size_type element_index) noexcept
Returns pointer to the specified element.
Definition: device_uvector.hpp:164
value_type front_element(cuda_stream_view stream) const
Returns the first element.
Definition: device_uvector.hpp:345
value_type const * const_pointer
The type of the pointer returned by data() const.
Definition: device_uvector.hpp:89
device_uvector(device_uvector const &other, cuda_stream_view stream, device_async_resource_ref mr=mr::get_current_device_resource_ref())
Construct a new device_uvector by deep copying the contents of another device_uvector.
Definition: device_uvector.hpp:149
value_type const & const_reference
Definition: device_uvector.hpp:87
const_reverse_iterator rbegin() const noexcept
Returns a const_reverse_iterator to the last element.
Definition: device_uvector.hpp:539
value_type element(size_type element_index, cuda_stream_view stream) const
Returns the specified element from device memory.
Definition: device_uvector.hpp:323
const_pointer element_ptr(size_type element_index) const noexcept
Returns pointer to the specified element.
Definition: device_uvector.hpp:178
const_iterator begin() const noexcept
Returns a const_iterator to the first element.
Definition: device_uvector.hpp:479
iterator begin() noexcept
Returns an iterator to the first element.
Definition: device_uvector.hpp:461
Exception thrown when attempting to access outside of a defined range.
Definition: error.hpp:110
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
Alias for a cuda::mr::async_resource_ref with the property cuda::mr::device_accessible.
Definition: resource_ref.hpp:40
device_async_resource_ref get_current_device_resource_ref()
Get the device_async_resource_ref for the current device.
Definition: per_device_resource.hpp:411
Management of per-device device_memory_resources.