aligned_resource_adaptor.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 #pragma once
6 
7 #include <rmm/aligned.hpp>
9 #include <rmm/detail/error.hpp>
10 #include <rmm/detail/export.hpp>
13 #include <rmm/resource_ref.hpp>
14 
15 #include <algorithm>
16 #include <cstddef>
17 #include <memory>
18 #include <mutex>
19 #include <unordered_map>
20 
21 namespace RMM_NAMESPACE {
22 namespace mr {
51 template <typename Upstream>
53  public:
66  std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT,
67  std::size_t alignment_threshold = default_alignment_threshold)
68  : upstream_{upstream},
69  alignment_{std::max(alignment, rmm::CUDA_ALLOCATION_ALIGNMENT)},
70  alignment_threshold_{alignment_threshold}
71  {
72  RMM_EXPECTS(rmm::is_supported_alignment(alignment),
73  "Allocation alignment is not a power of 2.");
74  }
75 
88  explicit aligned_resource_adaptor(Upstream* upstream,
89  std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT,
90  std::size_t alignment_threshold = default_alignment_threshold)
91  : upstream_{to_device_async_resource_ref_checked(upstream)},
92  alignment_{std::max(alignment, rmm::CUDA_ALLOCATION_ALIGNMENT)},
93  alignment_threshold_{alignment_threshold}
94  {
95  RMM_EXPECTS(rmm::is_supported_alignment(alignment),
96  "Allocation alignment is not a power of 2.");
97  }
98 
99  aligned_resource_adaptor() = delete;
100  ~aligned_resource_adaptor() override = default;
103  aligned_resource_adaptor& operator=(aligned_resource_adaptor const&) = delete;
104  aligned_resource_adaptor& operator=(aligned_resource_adaptor&&) = delete;
105 
110  {
111  return upstream_;
112  }
113 
117  static constexpr std::size_t default_alignment_threshold = 0;
118 
119  private:
120  using lock_guard = std::lock_guard<std::mutex>;
121 
133  void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
134  {
135  if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) {
136  return get_upstream_resource().allocate(stream, bytes, 1);
137  }
138  auto const size = upstream_allocation_size(bytes);
139  void* pointer = get_upstream_resource().allocate(stream, size, 1);
140  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
141  auto const address = reinterpret_cast<std::size_t>(pointer);
142  auto const aligned_address = rmm::align_up(address, alignment_);
143  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr)
144  void* aligned_pointer = reinterpret_cast<void*>(aligned_address);
145  if (pointer != aligned_pointer) {
146  lock_guard lock(mtx_);
147  pointers_.emplace(aligned_pointer, pointer);
148  }
149  return aligned_pointer;
150  }
151 
159  void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override
160  {
161  if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) {
162  get_upstream_resource().deallocate(stream, ptr, bytes, 1);
163  } else {
164  {
165  lock_guard lock(mtx_);
166  auto const iter = pointers_.find(ptr);
167  if (iter != pointers_.end()) {
168  ptr = iter->second;
169  pointers_.erase(iter);
170  }
171  }
172  get_upstream_resource().deallocate(stream, ptr, upstream_allocation_size(bytes), 1);
173  }
174  }
175 
183  [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
184  {
185  if (this == std::addressof(other)) { return true; }
186  auto cast = dynamic_cast<aligned_resource_adaptor<Upstream> const*>(&other);
187  if (cast == nullptr) { return false; }
188  return get_upstream_resource() == cast->get_upstream_resource() &&
189  alignment_ == cast->alignment_ && alignment_threshold_ == cast->alignment_threshold_;
190  }
191 
211  std::size_t upstream_allocation_size(std::size_t bytes) const
212  {
213  auto const aligned_size = rmm::align_up(bytes, alignment_);
214  // aligned_size: bytes of properly aligned space needed
215  // (alignment_ - rmm::CUDA_ALLOCATION_ALIGNMENT): maximum "waste" due to pointer misalignment
216  return aligned_size + (alignment_ - rmm::CUDA_ALLOCATION_ALIGNMENT);
217  }
218 
220  device_async_resource_ref upstream_;
221  std::unordered_map<void*, void*> pointers_;
222  std::size_t alignment_;
223  std::size_t alignment_threshold_;
224  mutable std::mutex mtx_;
225 };
226  // end of group
228 } // namespace mr
229 } // namespace RMM_NAMESPACE
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:28
Resource that adapts Upstream memory resource to allocate memory in a specified alignment size.
Definition: aligned_resource_adaptor.hpp:52
aligned_resource_adaptor(device_async_resource_ref upstream, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT, std::size_t alignment_threshold=default_alignment_threshold)
Construct an aligned resource adaptor using upstream to satisfy allocation requests.
Definition: aligned_resource_adaptor.hpp:65
aligned_resource_adaptor(Upstream *upstream, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT, std::size_t alignment_threshold=default_alignment_threshold)
Construct an aligned resource adaptor using upstream to satisfy allocation requests.
Definition: aligned_resource_adaptor.hpp:88
rmm::device_async_resource_ref get_upstream_resource() const noexcept
rmm::device_async_resource_ref to the upstream resource
Definition: aligned_resource_adaptor.hpp:109
Base class for all librmm device memory allocation.
Definition: device_memory_resource.hpp:83
device_async_resource_ref to_device_async_resource_ref_checked(Resource *res)
Convert pointer to memory resource into device_async_resource_ref, checking for nullptr
Definition: resource_ref.hpp:72
detail::cccl_async_resource_ref< cuda::mr::resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
Alias for a cuda::mr::async_resource_ref with the property cuda::mr::device_accessible.
Definition: resource_ref.hpp:32
static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT
Default alignment used for CUDA memory allocation.
Definition: aligned.hpp:25
bool is_supported_alignment(std::size_t alignment) noexcept
Returns whether or not alignment is a valid memory alignment.
std::size_t align_up(std::size_t value, std::size_t alignment) noexcept
Align up to nearest multiple of specified power of 2.
RAPIDS Memory Manager - The top-level namespace for all RMM functionality.
Management of per-device device_memory_resources.