aligned_resource_adaptor.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021-2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <rmm/aligned.hpp>
19 #include <rmm/cuda_stream_view.hpp>
20 #include <rmm/detail/error.hpp>
21 #include <rmm/detail/export.hpp>
24 #include <rmm/resource_ref.hpp>
25 
26 #include <algorithm>
27 #include <cstddef>
28 #include <mutex>
29 #include <unordered_map>
30 
31 namespace RMM_NAMESPACE {
32 namespace mr {
61 template <typename Upstream>
63  public:
76  std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT,
77  std::size_t alignment_threshold = default_alignment_threshold)
78  : upstream_{upstream},
79  alignment_{std::max(alignment, rmm::CUDA_ALLOCATION_ALIGNMENT)},
80  alignment_threshold_{alignment_threshold}
81  {
82  RMM_EXPECTS(rmm::is_supported_alignment(alignment),
83  "Allocation alignment is not a power of 2.");
84  }
85 
98  explicit aligned_resource_adaptor(Upstream* upstream,
99  std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT,
100  std::size_t alignment_threshold = default_alignment_threshold)
101  : upstream_{to_device_async_resource_ref_checked(upstream)},
102  alignment_{std::max(alignment, rmm::CUDA_ALLOCATION_ALIGNMENT)},
103  alignment_threshold_{alignment_threshold}
104  {
105  RMM_EXPECTS(rmm::is_supported_alignment(alignment),
106  "Allocation alignment is not a power of 2.");
107  }
108 
109  aligned_resource_adaptor() = delete;
110  ~aligned_resource_adaptor() override = default;
113  aligned_resource_adaptor& operator=(aligned_resource_adaptor const&) = delete;
114  aligned_resource_adaptor& operator=(aligned_resource_adaptor&&) = delete;
115 
120  {
121  return upstream_;
122  }
123 
127  static constexpr std::size_t default_alignment_threshold = 0;
128 
129  private:
130  using lock_guard = std::lock_guard<std::mutex>;
131 
143  void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
144  {
145  if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) {
146  return get_upstream_resource().allocate_async(bytes, 1, stream);
147  }
148  auto const size = upstream_allocation_size(bytes);
149  void* pointer = get_upstream_resource().allocate_async(size, 1, stream);
150  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
151  auto const address = reinterpret_cast<std::size_t>(pointer);
152  auto const aligned_address = rmm::align_up(address, alignment_);
153  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr)
154  void* aligned_pointer = reinterpret_cast<void*>(aligned_address);
155  if (pointer != aligned_pointer) {
156  lock_guard lock(mtx_);
157  pointers_.emplace(aligned_pointer, pointer);
158  }
159  return aligned_pointer;
160  }
161 
169  void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
170  {
171  if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) {
172  get_upstream_resource().deallocate_async(ptr, bytes, 1, stream);
173  } else {
174  {
175  lock_guard lock(mtx_);
176  auto const iter = pointers_.find(ptr);
177  if (iter != pointers_.end()) {
178  ptr = iter->second;
179  pointers_.erase(iter);
180  }
181  }
182  get_upstream_resource().deallocate_async(ptr, upstream_allocation_size(bytes), 1, stream);
183  }
184  }
185 
193  [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
194  {
195  if (this == &other) { return true; }
196  auto cast = dynamic_cast<aligned_resource_adaptor<Upstream> const*>(&other);
197  if (cast == nullptr) { return false; }
198  return get_upstream_resource() == cast->get_upstream_resource() &&
199  alignment_ == cast->alignment_ && alignment_threshold_ == cast->alignment_threshold_;
200  }
201 
221  std::size_t upstream_allocation_size(std::size_t bytes) const
222  {
223  auto const aligned_size = rmm::align_up(bytes, alignment_);
224  // aligned_size: bytes of properly aligned space needed
225  // (alignment_ - rmm::CUDA_ALLOCATION_ALIGNMENT): maximum "waste" due to pointer misalignment
226  return aligned_size + (alignment_ - rmm::CUDA_ALLOCATION_ALIGNMENT);
227  }
228 
230  device_async_resource_ref upstream_;
231  std::unordered_map<void*, void*> pointers_;
232  std::size_t alignment_;
233  std::size_t alignment_threshold_;
234  mutable std::mutex mtx_;
235 };
236  // end of group
238 } // namespace mr
239 } // namespace RMM_NAMESPACE
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:39
Resource that adapts Upstream memory resource to allocate memory in a specified alignment size.
Definition: aligned_resource_adaptor.hpp:62
aligned_resource_adaptor(device_async_resource_ref upstream, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT, std::size_t alignment_threshold=default_alignment_threshold)
Construct an aligned resource adaptor using upstream to satisfy allocation requests.
Definition: aligned_resource_adaptor.hpp:75
aligned_resource_adaptor(Upstream *upstream, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT, std::size_t alignment_threshold=default_alignment_threshold)
Construct an aligned resource adaptor using upstream to satisfy allocation requests.
Definition: aligned_resource_adaptor.hpp:98
rmm::device_async_resource_ref get_upstream_resource() const noexcept
rmm::device_async_resource_ref to the upstream resource
Definition: aligned_resource_adaptor.hpp:119
Base class for all librmm device memory allocation.
Definition: device_memory_resource.hpp:92
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
Alias for a cuda::mr::async_resource_ref with the property cuda::mr::device_accessible.
Definition: resource_ref.hpp:40
device_async_resource_ref to_device_async_resource_ref_checked(Resource *res)
Convert pointer to memory resource into device_async_resource_ref, checking for nullptr
Definition: resource_ref.hpp:78
static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT
Default alignment used for CUDA memory allocation.
Definition: aligned.hpp:43
bool is_supported_alignment(std::size_t alignment) noexcept
Returns whether or not alignment is a valid memory alignment.
std::size_t align_up(std::size_t value, std::size_t alignment) noexcept
Align up to nearest multiple of specified power of 2.
Management of per-device device_memory_resources.