librmm/25.04/sam__headroom__memory__resource_8hpp_source

 /*

  * Copyright (c) 2024-2025, NVIDIA CORPORATION.

  *

  * Licensed under the Apache License, Version 2.0 (the "License");

  * you may not use this file except in compliance with the License.

  * You may obtain a copy of the License at

  *

  *     http://www.apache.org/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing, software

  * distributed under the License is distributed on an "AS IS" BASIS,

  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  * See the License for the specific language governing permissions and

  * limitations under the License.

  */

 #pragma once


 #include <rmm/cuda_device.hpp>

 #include <rmm/cuda_stream_view.hpp>

 #include <rmm/detail/export.hpp>

 #include <rmm/mr/device/device_memory_resource.hpp>

 #include <rmm/mr/device/system_memory_resource.hpp>

 #include <rmm/resource_ref.hpp>


 #include <algorithm>

 #include <cstddef>


 namespace RMM_NAMESPACE {

 namespace mr {

 class sam_headroom_memory_resource final : public device_memory_resource {

  public:

   explicit sam_headroom_memory_resource(std::size_t headroom) : system_mr_{}, headroom_{headroom} {}


   sam_headroom_memory_resource()                                               = delete;

   ~sam_headroom_memory_resource() override                                     = default;

   sam_headroom_memory_resource(sam_headroom_memory_resource const&)            = delete;

   sam_headroom_memory_resource(sam_headroom_memory_resource&&)                 = delete;

   sam_headroom_memory_resource& operator=(sam_headroom_memory_resource const&) = delete;

   sam_headroom_memory_resource& operator=(sam_headroom_memory_resource&&)      = delete;


  private:

   void* do_allocate(std::size_t bytes, [[maybe_unused]] cuda_stream_view stream) override

   {

     void* pointer = system_mr_.allocate_async(bytes, rmm::CUDA_ALLOCATION_ALIGNMENT, stream);


     auto const free        = rmm::available_device_memory().first;

     auto const allocatable = free > headroom_ ? free - headroom_ : 0UL;

     auto const gpu_portion =

       rmm::align_down(std::min(allocatable, bytes), rmm::CUDA_ALLOCATION_ALIGNMENT);

     auto const cpu_portion = bytes - gpu_portion;

     if (gpu_portion != 0) {

       RMM_CUDA_TRY(cudaMemAdvise(pointer,

                                  gpu_portion,

                                  cudaMemAdviseSetPreferredLocation,

                                  rmm::get_current_cuda_device().value()));

     }

     if (cpu_portion != 0) {

       RMM_CUDA_TRY(cudaMemAdvise(static_cast<char*>(pointer) + gpu_portion,

                                  cpu_portion,

                                  cudaMemAdviseSetPreferredLocation,

                                  cudaCpuDeviceId));

     }


     return pointer;

   }


   void do_deallocate(void* ptr,

                      [[maybe_unused]] std::size_t bytes,

                      [[maybe_unused]] cuda_stream_view stream) override

   {

     system_mr_.deallocate_async(ptr, rmm::CUDA_ALLOCATION_ALIGNMENT, stream);

   }


   [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override

   {

     if (this == &other) { return true; }

     auto cast = dynamic_cast<sam_headroom_memory_resource const*>(&other);

     if (cast == nullptr) { return false; }

     return headroom_ == cast->headroom_;

   }


   system_memory_resource system_mr_;

   std::size_t headroom_;

 };  // end of group

 }  // namespace mr

 }  // namespace RMM_NAMESPACE

rmm::cuda_stream_view
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:39

rmm::mr::device_memory_resource
Base class for all librmm device memory allocation.
Definition: device_memory_resource.hpp:93

rmm::mr::device_memory_resource::allocate_async
void * allocate_async(std::size_t bytes, std::size_t alignment, cuda_stream_view stream)
Allocates memory of size at least bytes.
Definition: device_memory_resource.hpp:216

rmm::mr::sam_headroom_memory_resource
Resource that uses system memory resource to allocate memory with a headroom.
Definition: sam_headroom_memory_resource.hpp:48

rmm::mr::sam_headroom_memory_resource::sam_headroom_memory_resource
sam_headroom_memory_resource(std::size_t headroom)
Construct a headroom memory resource.
Definition: sam_headroom_memory_resource.hpp:55

cuda_device.hpp

cuda_stream_view.hpp

device_memory_resource.hpp

rmm::available_device_memory
std::pair< std::size_t, std::size_t > available_device_memory()
Returns the available and total device memory in bytes for the current device.
Definition: cuda_device.hpp:123

rmm::get_current_cuda_device
cuda_device_id get_current_cuda_device()
Returns a cuda_device_id for the current device.
Definition: cuda_device.hpp:99

rmm::CUDA_ALLOCATION_ALIGNMENT
static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT
Default alignment used for CUDA memory allocation.
Definition: aligned.hpp:43

rmm::align_down
constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexcept
Align down to the nearest multiple of specified power of 2.
Definition: aligned.hpp:91

resource_ref.hpp

system_memory_resource.hpp