RMM: sam_headroom_memory_resource.hpp Source File

 /*

  * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.

  * SPDX-License-Identifier: Apache-2.0

  */

 #pragma once


 #include <rmm/cuda_device.hpp>

 #include <rmm/cuda_stream_view.hpp>

 #include <rmm/detail/export.hpp>

 #include <rmm/mr/device_memory_resource.hpp>

 #include <rmm/mr/system_memory_resource.hpp>

 #include <rmm/resource_ref.hpp>


 #include <algorithm>

 #include <cstddef>


 namespace RMM_NAMESPACE {

 namespace mr {

 class sam_headroom_memory_resource final : public device_memory_resource {

  public:

   explicit sam_headroom_memory_resource(std::size_t headroom) : system_mr_{}, headroom_{headroom} {}


   sam_headroom_memory_resource()                                               = delete;

   ~sam_headroom_memory_resource() override                                     = default;

   sam_headroom_memory_resource(sam_headroom_memory_resource const&)            = delete;

   sam_headroom_memory_resource(sam_headroom_memory_resource&&)                 = delete;

   sam_headroom_memory_resource& operator=(sam_headroom_memory_resource const&) = delete;

   sam_headroom_memory_resource& operator=(sam_headroom_memory_resource&&)      = delete;


  private:

   void* do_allocate(std::size_t bytes, [[maybe_unused]] cuda_stream_view stream) override

   {

     void* pointer = system_mr_.allocate(stream, bytes, rmm::CUDA_ALLOCATION_ALIGNMENT);


     auto const free        = rmm::available_device_memory().first;

     auto const allocatable = free > headroom_ ? free - headroom_ : 0UL;

     auto const gpu_portion =

       rmm::align_down(std::min(allocatable, bytes), rmm::CUDA_ALLOCATION_ALIGNMENT);

     auto const cpu_portion = bytes - gpu_portion;


     if (gpu_portion != 0) {

 #if defined(CUDART_VERSION) && CUDART_VERSION >= 13000

       cudaMemLocation location{cudaMemLocationTypeDevice, rmm::get_current_cuda_device().value()};

       RMM_CUDA_TRY(

         cudaMemAdvise(pointer, gpu_portion, cudaMemAdviseSetPreferredLocation, location));

 #else

       RMM_CUDA_TRY(cudaMemAdvise(pointer,

                                  gpu_portion,

                                  cudaMemAdviseSetPreferredLocation,

                                  rmm::get_current_cuda_device().value()));

 #endif

     }

     if (cpu_portion != 0) {

 #if defined(CUDART_VERSION) && CUDART_VERSION >= 13000

       cudaMemLocation location{cudaMemLocationTypeHost, 0};

       RMM_CUDA_TRY(cudaMemAdvise(static_cast<char*>(pointer) + gpu_portion,

                                  cpu_portion,

                                  cudaMemAdviseSetPreferredLocation,

                                  location));

 #else

       RMM_CUDA_TRY(cudaMemAdvise(static_cast<char*>(pointer) + gpu_portion,

                                  cpu_portion,

                                  cudaMemAdviseSetPreferredLocation,

                                  cudaCpuDeviceId));

 #endif

     }


     return pointer;

   }


   void do_deallocate(void* ptr,

                      [[maybe_unused]] std::size_t bytes,

                      [[maybe_unused]] cuda_stream_view stream) noexcept override

   {

     system_mr_.deallocate(stream, ptr, bytes, rmm::CUDA_ALLOCATION_ALIGNMENT);

   }


   [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override

   {

     if (this == &other) { return true; }

     auto cast = dynamic_cast<sam_headroom_memory_resource const*>(&other);

     if (cast == nullptr) { return false; }

     return headroom_ == cast->headroom_;

   }


   system_memory_resource system_mr_;

   std::size_t headroom_;

 };  // end of group

 }  // namespace mr

 }  // namespace RMM_NAMESPACE

rmm::cuda_stream_view
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:28

rmm::mr::device_memory_resource
Base class for all librmm device memory allocation.
Definition: device_memory_resource.hpp:82

rmm::mr::device_memory_resource::allocate
void * allocate(cuda_stream_view stream, std::size_t bytes, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT)
Allocates memory of size at least bytes on the specified stream.
Definition: device_memory_resource.hpp:152

rmm::mr::sam_headroom_memory_resource
Resource that uses system memory resource to allocate memory with a headroom.
Definition: sam_headroom_memory_resource.hpp:37

rmm::mr::sam_headroom_memory_resource::sam_headroom_memory_resource
sam_headroom_memory_resource(std::size_t headroom)
Construct a headroom memory resource.
Definition: sam_headroom_memory_resource.hpp:44

cuda_device.hpp

cuda_stream_view.hpp

device_memory_resource.hpp

rmm::available_device_memory
std::pair< std::size_t, std::size_t > available_device_memory()
Returns the available and total device memory in bytes for the current device.

rmm::get_current_cuda_device
cuda_device_id get_current_cuda_device()
Returns a cuda_device_id for the current device.

rmm::CUDA_ALLOCATION_ALIGNMENT
static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT
Default alignment used for CUDA memory allocation.
Definition: aligned.hpp:25

rmm::align_down
std::size_t align_down(std::size_t value, std::size_t alignment) noexcept
Align down to the nearest multiple of specified power of 2.

resource_ref.hpp

rmm::cuda_device_id::value
constexpr value_type value() const noexcept
The wrapped integer value.
Definition: cuda_device.hpp:43

system_memory_resource.hpp