RMM: system_memory_resource.hpp Source File

 /*

  * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.

  * SPDX-License-Identifier: Apache-2.0

  */

 #pragma once


 #include <rmm/cuda_device.hpp>

 #include <rmm/cuda_stream_view.hpp>

 #include <rmm/detail/error.hpp>

 #include <rmm/detail/export.hpp>

 #include <rmm/detail/format.hpp>

 #include <rmm/mr/device_memory_resource.hpp>


 #include <cstddef>

 #include <string>


 namespace RMM_NAMESPACE {

 namespace mr {


 namespace detail {

 static bool is_system_memory_supported(cuda_device_id device_id)

 {

   // Check if pageable memory access is supported

   int pageableMemoryAccess;

   RMM_CUDA_TRY(cudaDeviceGetAttribute(

     &pageableMemoryAccess, cudaDevAttrPageableMemoryAccess, device_id.value()));

   return pageableMemoryAccess == 1;

 }

 }  // namespace detail


 class system_memory_resource final : public device_memory_resource {

  public:

   system_memory_resource()

   {

     RMM_EXPECTS(rmm::mr::detail::is_system_memory_supported(rmm::get_current_cuda_device()),

                 "System memory allocator is not supported with this hardware/software version.");

   }

   ~system_memory_resource() override                    = default;

   system_memory_resource(system_memory_resource const&) = default;

   system_memory_resource(system_memory_resource&&)      = default;

   system_memory_resource& operator=(system_memory_resource const&) =

     default;

   system_memory_resource& operator=(system_memory_resource&&) =

     default;


  private:

   void* do_allocate(std::size_t bytes, [[maybe_unused]] cuda_stream_view stream) override

   {

     try {

       return rmm::detail::aligned_host_allocate(

         bytes, CUDA_ALLOCATION_ALIGNMENT, [](std::size_t size) { return ::operator new(size); });

     } catch (std::bad_alloc const& e) {

       auto const msg = std::string("Failed to allocate ") + rmm::detail::format_bytes(bytes) +

                        std::string("of memory: ") + e.what();

       RMM_FAIL(msg.c_str(), rmm::out_of_memory);

     }

   }


   void do_deallocate(void* ptr,

                      [[maybe_unused]] std::size_t bytes,

                      cuda_stream_view stream) noexcept override

   {

     // With `cudaFree`, the CUDA runtime keeps track of dependent operations and does implicit

     // synchronization. However, with SAM, since `free` is immediate, we need to wait for in-flight

     // CUDA operations to finish before freeing the memory, to avoid potential use-after-free errors

     // or race conditions.

     stream.synchronize();


     rmm::detail::aligned_host_deallocate(

       ptr, bytes, CUDA_ALLOCATION_ALIGNMENT, [](void* ptr) { ::operator delete(ptr); });

   }


   [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override

   {

     return dynamic_cast<system_memory_resource const*>(&other) != nullptr;

   }

   friend void get_property(system_memory_resource const&, cuda::mr::device_accessible) noexcept {}


   friend void get_property(system_memory_resource const&, cuda::mr::host_accessible) noexcept {}

 };


 // static property checks

 static_assert(

   rmm::detail::polyfill::async_resource_with<system_memory_resource, cuda::mr::device_accessible>);

 static_assert(

   rmm::detail::polyfill::async_resource_with<system_memory_resource, cuda::mr::host_accessible>);  // end of group

 }  // namespace mr

 }  // namespace RMM_NAMESPACE

rmm::cuda_stream_view
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:28

rmm::mr::device_memory_resource
Base class for all librmm device memory allocation.
Definition: device_memory_resource.hpp:82

rmm::mr::system_memory_resource
device_memory_resource derived class that uses malloc/free for allocation/deallocation.
Definition: system_memory_resource.hpp:64

rmm::mr::system_memory_resource::system_memory_resource
system_memory_resource(system_memory_resource const &)=default
Default copy constructor.

rmm::mr::system_memory_resource::system_memory_resource
system_memory_resource(system_memory_resource &&)=default
Default copy constructor.

rmm::mr::system_memory_resource::operator=
system_memory_resource & operator=(system_memory_resource const &)=default
Default copy assignment operator.

rmm::mr::system_memory_resource::get_property
friend void get_property(system_memory_resource const &, cuda::mr::device_accessible) noexcept
Enables the cuda::mr::device_accessible property.
Definition: system_memory_resource.hpp:146

rmm::mr::system_memory_resource::get_property
friend void get_property(system_memory_resource const &, cuda::mr::host_accessible) noexcept
Enables the cuda::mr::host_accessible property.
Definition: system_memory_resource.hpp:153

rmm::mr::system_memory_resource::operator=
system_memory_resource & operator=(system_memory_resource &&)=default
Default move assignment operator.

rmm::out_of_memory
Exception thrown when RMM runs out of memory.
Definition: error.hpp:76

cuda_device.hpp

cuda_stream_view.hpp

device_memory_resource.hpp

rmm::get_current_cuda_device
cuda_device_id get_current_cuda_device()
Returns a cuda_device_id for the current device.

rmm::CUDA_ALLOCATION_ALIGNMENT
static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT
Default alignment used for CUDA memory allocation.
Definition: aligned.hpp:25

rmm::cuda_device_id
Strong type for a CUDA device identifier.
Definition: cuda_device.hpp:27

rmm::cuda_device_id::value
constexpr value_type value() const noexcept
The wrapped integer value.
Definition: cuda_device.hpp:43

rmm::mr::detail::is_system_memory_supported
static bool is_system_memory_supported(cuda_device_id device_id)
Check if system allocated memory (SAM) is supported on the specified device.
Definition: system_memory_resource.hpp:27