sam_headroom_memory_resource.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 #pragma once
6 
7 #include <rmm/cuda_device.hpp>
9 #include <rmm/detail/export.hpp>
12 #include <rmm/resource_ref.hpp>
13 
14 #include <algorithm>
15 #include <cstddef>
16 
17 namespace RMM_NAMESPACE {
18 namespace mr {
38  public:
44  explicit sam_headroom_memory_resource(std::size_t headroom) : system_mr_{}, headroom_{headroom} {}
45 
47  ~sam_headroom_memory_resource() override = default;
52 
53  private:
65  void* do_allocate(std::size_t bytes, [[maybe_unused]] cuda_stream_view stream) override
66  {
67  void* pointer = system_mr_.allocate(stream, bytes, rmm::CUDA_ALLOCATION_ALIGNMENT);
68 
69  auto const free = rmm::available_device_memory().first;
70  auto const allocatable = free > headroom_ ? free - headroom_ : 0UL;
71  auto const gpu_portion =
72  rmm::align_down(std::min(allocatable, bytes), rmm::CUDA_ALLOCATION_ALIGNMENT);
73  auto const cpu_portion = bytes - gpu_portion;
74 
75  if (gpu_portion != 0) {
76 #if defined(CUDART_VERSION) && CUDART_VERSION >= 13000
77  cudaMemLocation location{cudaMemLocationTypeDevice, rmm::get_current_cuda_device().value()};
78  RMM_CUDA_TRY(
79  cudaMemAdvise(pointer, gpu_portion, cudaMemAdviseSetPreferredLocation, location));
80 #else
81  RMM_CUDA_TRY(cudaMemAdvise(pointer,
82  gpu_portion,
83  cudaMemAdviseSetPreferredLocation,
84  rmm::get_current_cuda_device().value()));
85 #endif
86  }
87  if (cpu_portion != 0) {
88 #if defined(CUDART_VERSION) && CUDART_VERSION >= 13000
89  cudaMemLocation location{cudaMemLocationTypeHost, 0};
90  RMM_CUDA_TRY(cudaMemAdvise(static_cast<char*>(pointer) + gpu_portion,
91  cpu_portion,
92  cudaMemAdviseSetPreferredLocation,
93  location));
94 #else
95  RMM_CUDA_TRY(cudaMemAdvise(static_cast<char*>(pointer) + gpu_portion,
96  cpu_portion,
97  cudaMemAdviseSetPreferredLocation,
98  cudaCpuDeviceId));
99 #endif
100  }
101 
102  return pointer;
103  }
104 
114  void do_deallocate(void* ptr,
115  [[maybe_unused]] std::size_t bytes,
116  [[maybe_unused]] cuda_stream_view stream) noexcept override
117  {
118  system_mr_.deallocate(stream, ptr, bytes, rmm::CUDA_ALLOCATION_ALIGNMENT);
119  }
120 
128  [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
129  {
130  if (this == &other) { return true; }
131  auto cast = dynamic_cast<sam_headroom_memory_resource const*>(&other);
132  if (cast == nullptr) { return false; }
133  return headroom_ == cast->headroom_;
134  }
135 
137  system_memory_resource system_mr_;
139  std::size_t headroom_;
140 }; // end of group
142 } // namespace mr
143 } // namespace RMM_NAMESPACE
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:28
Base class for all librmm device memory allocation.
Definition: device_memory_resource.hpp:83
void * allocate(cuda_stream_view stream, std::size_t bytes, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT)
Allocates memory of size at least bytes on the specified stream.
Definition: device_memory_resource.hpp:322
Resource that uses system memory resource to allocate memory with a headroom.
Definition: sam_headroom_memory_resource.hpp:37
sam_headroom_memory_resource(std::size_t headroom)
Construct a headroom memory resource.
Definition: sam_headroom_memory_resource.hpp:44
std::pair< std::size_t, std::size_t > available_device_memory()
Returns the available and total device memory in bytes for the current device.
cuda_device_id get_current_cuda_device()
Returns a cuda_device_id for the current device.
static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT
Default alignment used for CUDA memory allocation.
Definition: aligned.hpp:31
std::size_t align_down(std::size_t value, std::size_t alignment) noexcept
Align down to the nearest multiple of specified power of 2.
constexpr value_type value() const noexcept
The wrapped integer value.
Definition: cuda_device.hpp:43