system_memory_resource.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2024-2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <rmm/cuda_device.hpp>
19 #include <rmm/cuda_stream_view.hpp>
20 #include <rmm/detail/error.hpp>
21 #include <rmm/detail/export.hpp>
22 #include <rmm/detail/format.hpp>
24 
25 #include <cstddef>
26 #include <string>
27 
28 namespace RMM_NAMESPACE {
29 namespace mr {
30 
31 namespace detail {
39 {
40  // Check if pageable memory access is supported
41  int pageableMemoryAccess;
42  RMM_CUDA_TRY(cudaDeviceGetAttribute(
43  &pageableMemoryAccess, cudaDevAttrPageableMemoryAccess, device_id.value()));
44  return pageableMemoryAccess == 1;
45 }
46 } // namespace detail
47 
76  public:
78  {
79  RMM_EXPECTS(rmm::mr::detail::is_system_memory_supported(rmm::get_current_cuda_device()),
80  "System memory allocator is not supported with this hardware/software version.");
81  }
82  ~system_memory_resource() override = default;
86  default;
88  default;
89 
90  private:
102  void* do_allocate(std::size_t bytes, [[maybe_unused]] cuda_stream_view stream) override
103  {
104  try {
105  return rmm::detail::aligned_host_allocate(
106  bytes, CUDA_ALLOCATION_ALIGNMENT, [](std::size_t size) { return ::operator new(size); });
107  } catch (std::bad_alloc const& e) {
108  auto const msg = std::string("Failed to allocate ") + rmm::detail::format_bytes(bytes) +
109  std::string("of memory: ") + e.what();
110  RMM_FAIL(msg.c_str(), rmm::out_of_memory);
111  }
112  }
113 
124  void do_deallocate(void* ptr,
125  [[maybe_unused]] std::size_t bytes,
126  cuda_stream_view stream) override
127  {
128  // With `cudaFree`, the CUDA runtime keeps track of dependent operations and does implicit
129  // synchronization. However, with SAM, since `free` is immediate, we need to wait for in-flight
130  // CUDA operations to finish before freeing the memory, to avoid potential use-after-free errors
131  // or race conditions.
132  stream.synchronize();
133 
134  rmm::detail::aligned_host_deallocate(
135  ptr, bytes, CUDA_ALLOCATION_ALIGNMENT, [](void* ptr) { ::operator delete(ptr); });
136  }
137 
148  [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
149  {
150  return dynamic_cast<system_memory_resource const*>(&other) != nullptr;
151  }
157  friend void get_property(system_memory_resource const&, cuda::mr::device_accessible) noexcept {}
158 
164  friend void get_property(system_memory_resource const&, cuda::mr::host_accessible) noexcept {}
165 };
166 
167 // static property checks
168 static_assert(cuda::mr::async_resource_with<system_memory_resource, cuda::mr::device_accessible>);
169 static_assert(cuda::mr::async_resource_with<system_memory_resource, cuda::mr::host_accessible>); // end of group
171 } // namespace mr
172 } // namespace RMM_NAMESPACE
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:39
void synchronize() const
Synchronize the viewed CUDA stream.
Base class for all librmm device memory allocation.
Definition: device_memory_resource.hpp:92
device_memory_resource derived class that uses malloc/free for allocation/deallocation.
Definition: system_memory_resource.hpp:75
system_memory_resource(system_memory_resource const &)=default
Default copy constructor.
system_memory_resource(system_memory_resource &&)=default
Default copy constructor.
system_memory_resource & operator=(system_memory_resource const &)=default
Default copy assignment operator.
friend void get_property(system_memory_resource const &, cuda::mr::device_accessible) noexcept
Enables the cuda::mr::device_accessible property.
Definition: system_memory_resource.hpp:157
friend void get_property(system_memory_resource const &, cuda::mr::host_accessible) noexcept
Enables the cuda::mr::host_accessible property.
Definition: system_memory_resource.hpp:164
system_memory_resource & operator=(system_memory_resource &&)=default
Default move assignment operator.
Exception thrown when RMM runs out of memory.
Definition: error.hpp:87
cuda_device_id get_current_cuda_device()
Returns a cuda_device_id for the current device.
static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT
Default alignment used for CUDA memory allocation.
Definition: aligned.hpp:43
Strong type for a CUDA device identifier.
Definition: cuda_device.hpp:38
constexpr value_type value() const noexcept
The wrapped integer value.
Definition: cuda_device.hpp:54
static bool is_system_memory_supported(cuda_device_id device_id)
Check if system allocated memory (SAM) is supported on the specified device.
Definition: system_memory_resource.hpp:38