librmm/26.06/system__memory__resource_8hpp_source

 /*

  * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.

  * SPDX-License-Identifier: Apache-2.0

  */

 #pragma once


 #include <rmm/aligned.hpp>

 #include <rmm/cuda_device.hpp>

 #include <rmm/detail/aligned.hpp>

 #include <rmm/detail/error.hpp>

 #include <rmm/detail/export.hpp>

 #include <rmm/detail/format.hpp>


 #include <cuda/memory_resource>

 #include <cuda/stream_ref>

 #include <cuda_runtime_api.h>


 #include <cstddef>

 #include <string>


 namespace RMM_NAMESPACE {

 namespace mr {


 namespace detail {

 static bool is_system_memory_supported(cuda_device_id device_id)

 {

   // Check if pageable memory access is supported

   int pageableMemoryAccess;

   RMM_CUDA_TRY(cudaDeviceGetAttribute(

     &pageableMemoryAccess, cudaDevAttrPageableMemoryAccess, device_id.value()));

   return pageableMemoryAccess == 1;

 }

 }  // namespace detail


 class system_memory_resource final {

  public:

   system_memory_resource()

   {

     RMM_EXPECTS(rmm::mr::detail::is_system_memory_supported(rmm::get_current_cuda_device()),

                 "System memory allocator is not supported with this hardware/software version.");

   }

   ~system_memory_resource()                             = default;

   system_memory_resource(system_memory_resource const&) = default;

   system_memory_resource(system_memory_resource&&)      = default;

   system_memory_resource& operator=(system_memory_resource const&) =

     default;

   system_memory_resource& operator=(system_memory_resource&&) =

     default;


   void* allocate([[maybe_unused]] cuda::stream_ref stream,

                  std::size_t bytes,

                  [[maybe_unused]] std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT)

   {

     try {

       return rmm::detail::aligned_host_allocate(

         bytes, rmm::CUDA_ALLOCATION_ALIGNMENT, [](std::size_t size) {

           return ::operator new(size);

         });

     } catch (std::bad_alloc const& e) {

       auto const msg = std::string("Failed to allocate ") + rmm::detail::format_bytes(bytes) +

                        std::string("of memory: ") + e.what();

       RMM_FAIL(msg.c_str(), rmm::out_of_memory);

     }

   }


   void deallocate(cuda::stream_ref stream,

                   void* ptr,

                   std::size_t bytes,

                   [[maybe_unused]] std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT) noexcept

   {

     // With `cudaFree`, the CUDA runtime keeps track of dependent operations and does implicit

     // synchronization. However, with SAM, since `free` is immediate, we need to wait for in-flight

     // CUDA operations to finish before freeing the memory, to avoid potential use-after-free errors

     // or race conditions.

     RMM_ASSERT_CUDA_SUCCESS_SAFE_SHUTDOWN(cudaStreamSynchronize(stream.get()));


     rmm::detail::aligned_host_deallocate(

       ptr, bytes, rmm::CUDA_ALLOCATION_ALIGNMENT, [](void* ptr) { ::operator delete(ptr); });

   }


   void* allocate_sync(std::size_t bytes, std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT)

   {

     auto* ptr = allocate(cuda::stream_ref{cudaStream_t{nullptr}}, bytes, alignment);

     RMM_CUDA_TRY(cudaStreamSynchronize(cudaStream_t{nullptr}));

     return ptr;

   }


   void deallocate_sync(void* ptr,

                        std::size_t bytes,

                        std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT) noexcept

   {

     deallocate(cuda::stream_ref{cudaStream_t{nullptr}}, ptr, bytes, alignment);

   }


   RMM_CONSTEXPR_FRIEND void get_property(system_memory_resource const&,

                                          cuda::mr::device_accessible) noexcept

   {

   }


   RMM_CONSTEXPR_FRIEND void get_property(system_memory_resource const&,

                                          cuda::mr::host_accessible) noexcept

   {

   }


   [[nodiscard]] bool operator==(system_memory_resource const&) const noexcept { return true; }


   [[nodiscard]] bool operator!=(system_memory_resource const&) const noexcept { return false; }

 };


 // static property checks

 static_assert(cuda::mr::synchronous_resource<system_memory_resource>);

 static_assert(cuda::mr::resource<system_memory_resource>);

 static_assert(

   cuda::mr::synchronous_resource_with<system_memory_resource, cuda::mr::device_accessible>);

 static_assert(

   cuda::mr::synchronous_resource_with<system_memory_resource, cuda::mr::host_accessible>);

 static_assert(cuda::mr::resource_with<system_memory_resource, cuda::mr::device_accessible>);

 static_assert(cuda::mr::resource_with<system_memory_resource, cuda::mr::host_accessible>);  // end of group

 }  // namespace mr

 }  // namespace RMM_NAMESPACE

aligned.hpp

rmm::mr::system_memory_resource
Memory resource that uses malloc/free for allocation/deallocation.
Definition: system_memory_resource.hpp:68

rmm::mr::system_memory_resource::system_memory_resource
system_memory_resource(system_memory_resource const &)=default
Default copy constructor.

rmm::mr::system_memory_resource::deallocate
void deallocate(cuda::stream_ref stream, void *ptr, std::size_t bytes, [[maybe_unused]] std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT) noexcept
Deallocate memory pointed to by ptr.
Definition: system_memory_resource.hpp:122

rmm::mr::system_memory_resource::system_memory_resource
system_memory_resource(system_memory_resource &&)=default
Default copy constructor.

rmm::mr::system_memory_resource::allocate
void * allocate([[maybe_unused]] cuda::stream_ref stream, std::size_t bytes, [[maybe_unused]] std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT)
Allocates memory of size at least bytes.
Definition: system_memory_resource.hpp:95

rmm::mr::system_memory_resource::operator=
system_memory_resource & operator=(system_memory_resource const &)=default
Default copy assignment operator.

rmm::mr::system_memory_resource::operator==
bool operator==(system_memory_resource const &) const noexcept
Compare this resource to another.
Definition: system_memory_resource.hpp:192

rmm::mr::system_memory_resource::get_property
friend void get_property(system_memory_resource const &, cuda::mr::device_accessible) noexcept
Enables the cuda::mr::device_accessible property.
Definition: system_memory_resource.hpp:170

rmm::mr::system_memory_resource::operator!=
bool operator!=(system_memory_resource const &) const noexcept
Compare this resource to another.
Definition: system_memory_resource.hpp:197

rmm::mr::system_memory_resource::allocate_sync
void * allocate_sync(std::size_t bytes, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT)
Allocates memory of size at least bytes synchronously.
Definition: system_memory_resource.hpp:144

rmm::mr::system_memory_resource::deallocate_sync
void deallocate_sync(void *ptr, std::size_t bytes, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT) noexcept
Deallocate memory pointed to by ptr synchronously.
Definition: system_memory_resource.hpp:158

rmm::mr::system_memory_resource::get_property
friend void get_property(system_memory_resource const &, cuda::mr::host_accessible) noexcept
Enables the cuda::mr::host_accessible property.
Definition: system_memory_resource.hpp:180

rmm::mr::system_memory_resource::operator=
system_memory_resource & operator=(system_memory_resource &&)=default
Default move assignment operator.

rmm::out_of_memory
Exception thrown when RMM runs out of memory.
Definition: error.hpp:76

cuda_device.hpp

rmm::get_current_cuda_device
cuda_device_id get_current_cuda_device()
Returns a cuda_device_id for the current device.

rmm::CUDA_ALLOCATION_ALIGNMENT
static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT
Default alignment used for CUDA memory allocation.
Definition: aligned.hpp:25

rmm::cuda_device_id
Strong type for a CUDA device identifier.
Definition: cuda_device.hpp:27

rmm::cuda_device_id::value
constexpr value_type value() const noexcept
The wrapped integer value.
Definition: cuda_device.hpp:43

rmm::mr::detail::is_system_memory_supported
static bool is_system_memory_supported(cuda_device_id device_id)
Check if system allocated memory (SAM) is supported on the specified device.
Definition: system_memory_resource.hpp:31