pinned_memory_resource.hpp
1 
5 #pragma once
6 
7 #include <cstddef>
8 #include <functional>
9 #include <memory>
10 
11 #include <cuda.h>
12 #include <cuda_runtime_api.h>
13 
14 #include <cuda/memory_resource>
15 
16 #include <rmm/aligned.hpp>
17 #include <rmm/cuda_device.hpp>
18 #include <rmm/cuda_stream_view.hpp>
19 #include <rmm/device_buffer.hpp>
20 
21 #include <rapidsmpf/config.hpp>
22 #include <rapidsmpf/error.hpp>
23 #include <rapidsmpf/memory/host_memory_resource.hpp>
24 #include <rapidsmpf/rmm_resource_adaptor.hpp>
25 #include <rapidsmpf/system_info.hpp>
26 #include <rapidsmpf/utils/misc.hpp>
27 
29 // NOLINTBEGIN(modernize-macro-to-enum)
30 #define RAPIDSMPF_PINNED_MEM_RES_MIN_CUDA_VERSION 12060
31 #define RAPIDSMPF_PINNED_MEM_RES_MIN_CUDA_VERSION_STR "v12.6"
32 
33 // NOLINTEND(modernize-macro-to-enum)
34 
35 namespace rapidsmpf {
36 
46  static const bool supported = [] {
47  // check if the device supports async memory pools
48  int cuda_pool_supported{};
49  auto attr_result = cudaDeviceGetAttribute(
50  &cuda_pool_supported,
51  cudaDevAttrMemoryPoolsSupported,
53  );
54  if (attr_result != cudaSuccess || cuda_pool_supported != 1) {
55  return false;
56  }
57 
58  int cuda_driver_version{};
59  auto driver_result = cudaDriverGetVersion(&cuda_driver_version);
60  int cuda_runtime_version{};
61  auto runtime_result = cudaRuntimeGetVersion(&cuda_runtime_version);
62  return driver_result == cudaSuccess && runtime_result == cudaSuccess
63  && cuda_driver_version >= RAPIDSMPF_PINNED_MEM_RES_MIN_CUDA_VERSION
64  && cuda_runtime_version >= RAPIDSMPF_PINNED_MEM_RES_MIN_CUDA_VERSION;
65  }();
66  return supported;
67 }
68 
76  std::size_t initial_pool_size = 0;
77 
79  std::optional<std::size_t> max_pool_size = std::nullopt;
80 };
81 
90  public:
92  static constexpr auto Disabled = nullptr;
93 
108  int numa_id = get_current_numa_node(), PinnedPoolProperties pool_properties = {}
109  );
110 
123  static std::shared_ptr<PinnedMemoryResource> make_if_available(
124  int numa_id = get_current_numa_node(), PinnedPoolProperties pool_properties = {}
125  );
126 
134  static std::shared_ptr<PinnedMemoryResource> from_options(config::Options options);
135 
136  ~PinnedMemoryResource() override;
137 
149  void* allocate(
150  rmm::cuda_stream_view stream,
151  std::size_t size,
152  std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT
153  ) override;
154 
164  rmm::cuda_stream_view stream,
165  void* ptr,
166  std::size_t size,
167  std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT
168  ) noexcept override;
169 
179  [[nodiscard]] bool is_equal(HostMemoryResource const& other) const noexcept override;
180 
186  [[nodiscard]] std::int64_t current_allocated() const noexcept {
187  return pool_tracker_.current_allocated();
188  }
189 
196  return pool_tracker_.get_main_record();
197  }
198 
204  [[nodiscard]] constexpr PinnedPoolProperties const& properties() const noexcept {
205  return pool_properties_;
206  }
207 
215  [[nodiscard]] std::function<std::int64_t()> get_memory_available_cb() const;
216 
222  friend void get_property(
223  PinnedMemoryResource const&, cuda::mr::device_accessible
224  ) noexcept {}
225 
226  private:
227  PinnedPoolProperties pool_properties_;
228 
229  // The cuda::pinned_memory_pool is moved into the RmmResourceAdaptor's primary_mr
230  // (an any_resource<device_accessible>), which keeps it alive. RmmResourceAdaptor is
231  // itself a shared_resource<RmmResourceAdaptorImpl>, so copies of PinnedMemoryResource
232  // share the same underlying pool and adaptor state (memory statistics). Copies are
233  // equal iff they share the same RmmResourceAdaptorImpl (is_equal compares
234  // pool_tracker_).
235  RmmResourceAdaptor pool_tracker_;
236 };
237 
238 static_assert(cuda::mr::resource<PinnedMemoryResource>);
239 static_assert(cuda::mr::resource_with<PinnedMemoryResource, cuda::mr::host_accessible>);
240 static_assert(cuda::mr::resource_with<PinnedMemoryResource, cuda::mr::device_accessible>);
241 
242 } // namespace rapidsmpf
Host memory resource using standard CPU allocation.
Memory resource that provides pinned (page-locked) host memory using a pool.
ScopedMemoryRecord get_main_memory_record() const
Returns the main memory record for the pinned pool.
static std::shared_ptr< PinnedMemoryResource > from_options(config::Options options)
Construct from configuration options.
std::function< std::int64_t()> get_memory_available_cb() const
Returns a memory-availability callback for the pinned pool, if the pool has a configured maximum size...
std::int64_t current_allocated() const noexcept
Returns the total number of currently allocated bytes.
static std::shared_ptr< PinnedMemoryResource > make_if_available(int numa_id=get_current_numa_node(), PinnedPoolProperties pool_properties={})
Create a pinned memory resource if the system supports pinned memory.
bool is_equal(HostMemoryResource const &other) const noexcept override
Compares this resource to another resource.
static constexpr auto Disabled
Sentinel value used to disable pinned host memory.
void * allocate(rmm::cuda_stream_view stream, std::size_t size, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT) override
Allocates pinned host memory associated with a CUDA stream.
void deallocate(rmm::cuda_stream_view stream, void *ptr, std::size_t size, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT) noexcept override
Deallocates pinned host memory associated with a CUDA stream.
friend void get_property(PinnedMemoryResource const &, cuda::mr::device_accessible) noexcept
Enables the cuda::mr::host_accessible property.
PinnedMemoryResource(int numa_id=get_current_numa_node(), PinnedPoolProperties pool_properties={})
Construct a pinned (page-locked) host memory resource.
constexpr PinnedPoolProperties const & properties() const noexcept
Returns the properties used to configure the pool.
A RMM memory resource adaptor tailored to RapidsMPF.
ScopedMemoryRecord get_main_record() const
Returns a copy of the main memory record.
std::int64_t current_allocated() const noexcept
Get the total current allocated memory from both primary and fallback.
Manages configuration options for RapidsMPF operations.
Definition: config.hpp:140
cuda_device_id get_current_cuda_device()
static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT
RAPIDS Multi-Processor interfaces.
Definition: backend.hpp:14
bool is_pinned_memory_resources_supported()
Checks if the PinnedMemoryResource is supported for the current CUDA version.
int get_current_numa_node() noexcept
Get the NUMA node ID associated with the calling CPU thread.
Properties for configuring a pinned memory pool.
std::optional< std::size_t > max_pool_size
Maximum size of the pool. std::nullopt means no limit.
std::size_t initial_pool_size
Initial size of the pool. Initial size is important for pinned memory performance,...
Memory statistics for a specific scope.