sam_headroom_memory_resource.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 #pragma once
6 
7 #include <rmm/cuda_device.hpp>
9 #include <rmm/detail/export.hpp>
12 #include <rmm/resource_ref.hpp>
13 
14 #include <algorithm>
15 #include <cstddef>
16 #include <memory>
17 
18 namespace RMM_NAMESPACE {
19 namespace mr {
39  public:
45  explicit sam_headroom_memory_resource(std::size_t headroom) : system_mr_{}, headroom_{headroom} {}
46 
48  ~sam_headroom_memory_resource() override = default;
53 
54  private:
66  void* do_allocate(std::size_t bytes, [[maybe_unused]] cuda_stream_view stream) override
67  {
68  void* pointer = system_mr_.allocate(stream, bytes, rmm::CUDA_ALLOCATION_ALIGNMENT);
69 
70  auto const free = rmm::available_device_memory().first;
71  auto const allocatable = free > headroom_ ? free - headroom_ : 0UL;
72  auto const gpu_portion =
73  rmm::align_down(std::min(allocatable, bytes), rmm::CUDA_ALLOCATION_ALIGNMENT);
74  auto const cpu_portion = bytes - gpu_portion;
75 
76  if (gpu_portion != 0) {
77 #if defined(CUDART_VERSION) && CUDART_VERSION >= 13000
78  cudaMemLocation location{cudaMemLocationTypeDevice, rmm::get_current_cuda_device().value()};
79  RMM_CUDA_TRY(
80  cudaMemAdvise(pointer, gpu_portion, cudaMemAdviseSetPreferredLocation, location));
81 #else
82  RMM_CUDA_TRY(cudaMemAdvise(pointer,
83  gpu_portion,
84  cudaMemAdviseSetPreferredLocation,
85  rmm::get_current_cuda_device().value()));
86 #endif
87  }
88  if (cpu_portion != 0) {
89 #if defined(CUDART_VERSION) && CUDART_VERSION >= 13000
90  cudaMemLocation location{cudaMemLocationTypeHost, 0};
91  RMM_CUDA_TRY(cudaMemAdvise(static_cast<char*>(pointer) + gpu_portion,
92  cpu_portion,
93  cudaMemAdviseSetPreferredLocation,
94  location));
95 #else
96  RMM_CUDA_TRY(cudaMemAdvise(static_cast<char*>(pointer) + gpu_portion,
97  cpu_portion,
98  cudaMemAdviseSetPreferredLocation,
99  cudaCpuDeviceId));
100 #endif
101  }
102 
103  return pointer;
104  }
105 
115  void do_deallocate(void* ptr,
116  [[maybe_unused]] std::size_t bytes,
117  [[maybe_unused]] cuda_stream_view stream) noexcept override
118  {
119  system_mr_.deallocate(stream, ptr, bytes, rmm::CUDA_ALLOCATION_ALIGNMENT);
120  }
121 
129  [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
130  {
131  if (this == std::addressof(other)) { return true; }
132  auto cast = dynamic_cast<sam_headroom_memory_resource const*>(&other);
133  if (cast == nullptr) { return false; }
134  return headroom_ == cast->headroom_;
135  }
136 
138  system_memory_resource system_mr_;
140  std::size_t headroom_;
141 }; // end of group
143 } // namespace mr
144 } // namespace RMM_NAMESPACE
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:28
Base class for all librmm device memory allocation.
Definition: device_memory_resource.hpp:83
void * allocate(cuda_stream_view stream, std::size_t bytes, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT)
Allocates memory of size at least bytes on the specified stream.
Definition: device_memory_resource.hpp:153
Resource that uses system memory resource to allocate memory with a headroom.
Definition: sam_headroom_memory_resource.hpp:38
sam_headroom_memory_resource(std::size_t headroom)
Construct a headroom memory resource.
Definition: sam_headroom_memory_resource.hpp:45
std::pair< std::size_t, std::size_t > available_device_memory()
Returns the available and total device memory in bytes for the current device.
cuda_device_id get_current_cuda_device()
Returns a cuda_device_id for the current device.
static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT
Default alignment used for CUDA memory allocation.
Definition: aligned.hpp:25
std::size_t align_down(std::size_t value, std::size_t alignment) noexcept
Align down to the nearest multiple of specified power of 2.
constexpr value_type value() const noexcept
The wrapped integer value.
Definition: cuda_device.hpp:43