system_memory_resource.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 #pragma once
6 
7 #include <rmm/cuda_device.hpp>
9 #include <rmm/detail/error.hpp>
10 #include <rmm/detail/export.hpp>
11 #include <rmm/detail/format.hpp>
13 
14 #include <cstddef>
15 #include <string>
16 
17 namespace RMM_NAMESPACE {
18 namespace mr {
19 
20 namespace detail {
28 {
29  // Check if pageable memory access is supported
30  int pageableMemoryAccess;
31  RMM_CUDA_TRY(cudaDeviceGetAttribute(
32  &pageableMemoryAccess, cudaDevAttrPageableMemoryAccess, device_id.value()));
33  return pageableMemoryAccess == 1;
34 }
35 } // namespace detail
36 
65  public:
67  {
68  RMM_EXPECTS(rmm::mr::detail::is_system_memory_supported(rmm::get_current_cuda_device()),
69  "System memory allocator is not supported with this hardware/software version.");
70  }
71  ~system_memory_resource() override = default;
75  default;
77  default;
78 
79  private:
91  void* do_allocate(std::size_t bytes, [[maybe_unused]] cuda_stream_view stream) override
92  {
93  try {
94  return rmm::detail::aligned_host_allocate(
95  bytes, CUDA_ALLOCATION_ALIGNMENT, [](std::size_t size) { return ::operator new(size); });
96  } catch (std::bad_alloc const& e) {
97  auto const msg = std::string("Failed to allocate ") + rmm::detail::format_bytes(bytes) +
98  std::string("of memory: ") + e.what();
99  RMM_FAIL(msg.c_str(), rmm::out_of_memory);
100  }
101  }
102 
113  void do_deallocate(void* ptr,
114  [[maybe_unused]] std::size_t bytes,
115  cuda_stream_view stream) noexcept override
116  {
117  // With `cudaFree`, the CUDA runtime keeps track of dependent operations and does implicit
118  // synchronization. However, with SAM, since `free` is immediate, we need to wait for in-flight
119  // CUDA operations to finish before freeing the memory, to avoid potential use-after-free errors
120  // or race conditions.
121  stream.synchronize();
122 
123  rmm::detail::aligned_host_deallocate(
124  ptr, bytes, CUDA_ALLOCATION_ALIGNMENT, [](void* ptr) { ::operator delete(ptr); });
125  }
126 
137  [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
138  {
139  return dynamic_cast<system_memory_resource const*>(&other) != nullptr;
140  }
146  friend void get_property(system_memory_resource const&, cuda::mr::device_accessible) noexcept {}
147 
153  friend void get_property(system_memory_resource const&, cuda::mr::host_accessible) noexcept {}
154 };
155 
156 // static property checks
157 static_assert(
158  rmm::detail::polyfill::async_resource_with<system_memory_resource, cuda::mr::device_accessible>);
159 static_assert(
160  rmm::detail::polyfill::async_resource_with<system_memory_resource, cuda::mr::host_accessible>); // end of group
162 } // namespace mr
163 } // namespace RMM_NAMESPACE
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:28
Base class for all librmm device memory allocation.
Definition: device_memory_resource.hpp:83
device_memory_resource derived class that uses malloc/free for allocation/deallocation.
Definition: system_memory_resource.hpp:64
system_memory_resource(system_memory_resource const &)=default
Default copy constructor.
system_memory_resource(system_memory_resource &&)=default
Default copy constructor.
system_memory_resource & operator=(system_memory_resource const &)=default
Default copy assignment operator.
friend void get_property(system_memory_resource const &, cuda::mr::device_accessible) noexcept
Enables the cuda::mr::device_accessible property.
Definition: system_memory_resource.hpp:146
friend void get_property(system_memory_resource const &, cuda::mr::host_accessible) noexcept
Enables the cuda::mr::host_accessible property.
Definition: system_memory_resource.hpp:153
system_memory_resource & operator=(system_memory_resource &&)=default
Default move assignment operator.
Exception thrown when RMM runs out of memory.
Definition: error.hpp:76
cuda_device_id get_current_cuda_device()
Returns a cuda_device_id for the current device.
static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT
Default alignment used for CUDA memory allocation.
Definition: aligned.hpp:31
Strong type for a CUDA device identifier.
Definition: cuda_device.hpp:27
constexpr value_type value() const noexcept
The wrapped integer value.
Definition: cuda_device.hpp:43
static bool is_system_memory_supported(cuda_device_id device_id)
Check if system allocated memory (SAM) is supported on the specified device.
Definition: system_memory_resource.hpp:27