system_memory_resource.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 #pragma once
6 
7 #include <rmm/aligned.hpp>
8 #include <rmm/cuda_device.hpp>
9 #include <rmm/detail/aligned.hpp>
10 #include <rmm/detail/error.hpp>
11 #include <rmm/detail/export.hpp>
12 #include <rmm/detail/format.hpp>
13 
14 #include <cuda/memory_resource>
15 #include <cuda/stream_ref>
16 #include <cuda_runtime_api.h>
17 
18 #include <cstddef>
19 #include <string>
20 
21 namespace RMM_NAMESPACE {
22 namespace mr {
23 
24 namespace detail {
32 {
33  // Check if pageable memory access is supported
34  int pageableMemoryAccess;
35  RMM_CUDA_TRY(cudaDeviceGetAttribute(
36  &pageableMemoryAccess, cudaDevAttrPageableMemoryAccess, device_id.value()));
37  return pageableMemoryAccess == 1;
38 }
39 } // namespace detail
40 
69  public:
71  {
72  RMM_EXPECTS(rmm::mr::detail::is_system_memory_supported(rmm::get_current_cuda_device()),
73  "System memory allocator is not supported with this hardware/software version.");
74  }
75  ~system_memory_resource() = default;
79  default;
81  default;
82 
95  void* allocate([[maybe_unused]] cuda::stream_ref stream,
96  std::size_t bytes,
97  [[maybe_unused]] std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT)
98  {
99  try {
100  return rmm::detail::aligned_host_allocate(
101  bytes, rmm::CUDA_ALLOCATION_ALIGNMENT, [](std::size_t size) {
102  return ::operator new(size);
103  });
104  } catch (std::bad_alloc const& e) {
105  auto const msg = std::string("Failed to allocate ") + rmm::detail::format_bytes(bytes) +
106  std::string("of memory: ") + e.what();
107  RMM_FAIL(msg.c_str(), rmm::out_of_memory);
108  }
109  }
110 
122  void deallocate(cuda::stream_ref stream,
123  void* ptr,
124  std::size_t bytes,
125  [[maybe_unused]] std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT) noexcept
126  {
127  // With `cudaFree`, the CUDA runtime keeps track of dependent operations and does implicit
128  // synchronization. However, with SAM, since `free` is immediate, we need to wait for in-flight
129  // CUDA operations to finish before freeing the memory, to avoid potential use-after-free errors
130  // or race conditions.
131  RMM_ASSERT_CUDA_SUCCESS_SAFE_SHUTDOWN(cudaStreamSynchronize(stream.get()));
132 
133  rmm::detail::aligned_host_deallocate(
134  ptr, bytes, rmm::CUDA_ALLOCATION_ALIGNMENT, [](void* ptr) { ::operator delete(ptr); });
135  }
136 
144  void* allocate_sync(std::size_t bytes, std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT)
145  {
146  auto* ptr = allocate(cuda::stream_ref{cudaStream_t{nullptr}}, bytes, alignment);
147  RMM_CUDA_TRY(cudaStreamSynchronize(cudaStream_t{nullptr}));
148  return ptr;
149  }
150 
158  void deallocate_sync(void* ptr,
159  std::size_t bytes,
160  std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT) noexcept
161  {
162  deallocate(cuda::stream_ref{cudaStream_t{nullptr}}, ptr, bytes, alignment);
163  }
164 
170  RMM_CONSTEXPR_FRIEND void get_property(system_memory_resource const&,
171  cuda::mr::device_accessible) noexcept
172  {
173  }
174 
180  RMM_CONSTEXPR_FRIEND void get_property(system_memory_resource const&,
181  cuda::mr::host_accessible) noexcept
182  {
183  }
184 
192  [[nodiscard]] bool operator==(system_memory_resource const&) const noexcept { return true; }
193 
197  [[nodiscard]] bool operator!=(system_memory_resource const&) const noexcept { return false; }
198 };
199 
200 // static property checks
201 static_assert(cuda::mr::synchronous_resource<system_memory_resource>);
202 static_assert(cuda::mr::resource<system_memory_resource>);
203 static_assert(
204  cuda::mr::synchronous_resource_with<system_memory_resource, cuda::mr::device_accessible>);
205 static_assert(
206  cuda::mr::synchronous_resource_with<system_memory_resource, cuda::mr::host_accessible>);
207 static_assert(cuda::mr::resource_with<system_memory_resource, cuda::mr::device_accessible>);
208 static_assert(cuda::mr::resource_with<system_memory_resource, cuda::mr::host_accessible>); // end of group
210 } // namespace mr
211 } // namespace RMM_NAMESPACE
Memory resource that uses malloc/free for allocation/deallocation.
Definition: system_memory_resource.hpp:68
system_memory_resource(system_memory_resource const &)=default
Default copy constructor.
void deallocate(cuda::stream_ref stream, void *ptr, std::size_t bytes, [[maybe_unused]] std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT) noexcept
Deallocate memory pointed to by ptr.
Definition: system_memory_resource.hpp:122
system_memory_resource(system_memory_resource &&)=default
Default copy constructor.
void * allocate([[maybe_unused]] cuda::stream_ref stream, std::size_t bytes, [[maybe_unused]] std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT)
Allocates memory of size at least bytes.
Definition: system_memory_resource.hpp:95
system_memory_resource & operator=(system_memory_resource const &)=default
Default copy assignment operator.
bool operator==(system_memory_resource const &) const noexcept
Compare this resource to another.
Definition: system_memory_resource.hpp:192
friend void get_property(system_memory_resource const &, cuda::mr::device_accessible) noexcept
Enables the cuda::mr::device_accessible property.
Definition: system_memory_resource.hpp:170
bool operator!=(system_memory_resource const &) const noexcept
Compare this resource to another.
Definition: system_memory_resource.hpp:197
void * allocate_sync(std::size_t bytes, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT)
Allocates memory of size at least bytes synchronously.
Definition: system_memory_resource.hpp:144
void deallocate_sync(void *ptr, std::size_t bytes, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT) noexcept
Deallocate memory pointed to by ptr synchronously.
Definition: system_memory_resource.hpp:158
friend void get_property(system_memory_resource const &, cuda::mr::host_accessible) noexcept
Enables the cuda::mr::host_accessible property.
Definition: system_memory_resource.hpp:180
system_memory_resource & operator=(system_memory_resource &&)=default
Default move assignment operator.
Exception thrown when RMM runs out of memory.
Definition: error.hpp:76
cuda_device_id get_current_cuda_device()
Returns a cuda_device_id for the current device.
static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT
Default alignment used for CUDA memory allocation.
Definition: aligned.hpp:25
Strong type for a CUDA device identifier.
Definition: cuda_device.hpp:27
constexpr value_type value() const noexcept
The wrapped integer value.
Definition: cuda_device.hpp:43
static bool is_system_memory_supported(cuda_device_id device_id)
Check if system allocated memory (SAM) is supported on the specified device.
Definition: system_memory_resource.hpp:31