pinned_memory_resource.hpp
1 
5 #pragma once
6 
7 #include <cstddef>
8 #include <memory>
9 
10 #include <cuda.h>
11 #include <cuda_runtime_api.h>
12 
13 #include <cuda/memory_resource>
14 
15 #include <rmm/aligned.hpp>
16 #include <rmm/cuda_device.hpp>
17 #include <rmm/cuda_stream_view.hpp>
18 #include <rmm/device_buffer.hpp>
19 
20 #include <rapidsmpf/config.hpp>
21 #include <rapidsmpf/error.hpp>
22 #include <rapidsmpf/memory/host_memory_resource.hpp>
23 #include <rapidsmpf/system_info.hpp>
24 #include <rapidsmpf/utils/misc.hpp>
25 
27 // NOLINTBEGIN(modernize-macro-to-enum)
28 #define RAPIDSMPF_PINNED_MEM_RES_MIN_CUDA_VERSION 12060
29 #define RAPIDSMPF_PINNED_MEM_RES_MIN_CUDA_VERSION_STR "v12.6"
30 
31 // NOLINTEND(modernize-macro-to-enum)
32 
33 namespace rapidsmpf {
34 
44  static const bool supported = [] {
45  // check if the device supports async memory pools
46  int cuda_pool_supported{};
47  auto attr_result = cudaDeviceGetAttribute(
48  &cuda_pool_supported,
49  cudaDevAttrMemoryPoolsSupported,
51  );
52  if (attr_result != cudaSuccess || cuda_pool_supported != 1) {
53  return false;
54  }
55 
56  int cuda_driver_version{};
57  auto driver_result = cudaDriverGetVersion(&cuda_driver_version);
58  int cuda_runtime_version{};
59  auto runtime_result = cudaRuntimeGetVersion(&cuda_runtime_version);
60  return driver_result == cudaSuccess && runtime_result == cudaSuccess
61  && cuda_driver_version >= RAPIDSMPF_PINNED_MEM_RES_MIN_CUDA_VERSION
62  && cuda_runtime_version >= RAPIDSMPF_PINNED_MEM_RES_MIN_CUDA_VERSION;
63  }();
64  return supported;
65 }
66 
74  std::size_t initial_pool_size = 0;
75 
77  std::optional<std::size_t> max_pool_size = std::nullopt;
78 };
79 
88  public:
90  static constexpr auto Disabled = nullptr;
91 
106  int numa_id = get_current_numa_node(), PinnedPoolProperties pool_properties = {}
107  );
108 
121  static std::shared_ptr<PinnedMemoryResource> make_if_available(
122  int numa_id = get_current_numa_node(), PinnedPoolProperties pool_properties = {}
123  );
124 
132  static std::shared_ptr<PinnedMemoryResource> from_options(config::Options options);
133 
134  ~PinnedMemoryResource() override;
135 
147  void* allocate(
148  rmm::cuda_stream_view stream,
149  std::size_t size,
150  std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT
151  ) override;
152 
162  rmm::cuda_stream_view stream,
163  void* ptr,
164  std::size_t size,
165  std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT
166  ) noexcept override;
167 
177  [[nodiscard]] bool is_equal(HostMemoryResource const& other) const noexcept override;
178 
184  friend void get_property(
185  PinnedMemoryResource const&, cuda::mr::device_accessible
186  ) noexcept {}
187 
188  private:
189  // We cannot assign cuda::pinned_memory_pool directly to device_async_resource_ref /
190  // host_async_resource_ref: the ref only stores a pointer, but its constructor
191  // requires the referenced type to be copyable and movable (CCCL __basic_any_ref
192  // constraint). pinned_memory_pool is not copyable, so we wrap it in
193  // PinnedMemoryResource, which holds the pool in a shared_resource and is copyable and
194  // movable. Copies share the same pool (is_equal compares pool_ pointers).
195  cuda::mr::shared_resource<cuda::pinned_memory_pool> pool_;
196 };
197 
198 static_assert(cuda::mr::resource<PinnedMemoryResource>);
199 static_assert(cuda::mr::resource_with<PinnedMemoryResource, cuda::mr::host_accessible>);
200 static_assert(cuda::mr::resource_with<PinnedMemoryResource, cuda::mr::device_accessible>);
201 
202 } // namespace rapidsmpf
Host memory resource using standard CPU allocation.
Memory resource that provides pinned (page-locked) host memory using a pool.
static std::shared_ptr< PinnedMemoryResource > from_options(config::Options options)
Construct from configuration options.
static std::shared_ptr< PinnedMemoryResource > make_if_available(int numa_id=get_current_numa_node(), PinnedPoolProperties pool_properties={})
Create a pinned memory resource if the system supports pinned memory.
bool is_equal(HostMemoryResource const &other) const noexcept override
Compares this resource to another resource.
static constexpr auto Disabled
Sentinel value used to disable pinned host memory.
void * allocate(rmm::cuda_stream_view stream, std::size_t size, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT) override
Allocates pinned host memory associated with a CUDA stream.
void deallocate(rmm::cuda_stream_view stream, void *ptr, std::size_t size, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT) noexcept override
Deallocates pinned host memory associated with a CUDA stream.
friend void get_property(PinnedMemoryResource const &, cuda::mr::device_accessible) noexcept
Enables the cuda::mr::host_accessible property.
PinnedMemoryResource(int numa_id=get_current_numa_node(), PinnedPoolProperties pool_properties={})
Construct a pinned (page-locked) host memory resource.
Manages configuration options for RapidsMPF operations.
Definition: config.hpp:140
cuda_device_id get_current_cuda_device()
static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT
RAPIDS Multi-Processor interfaces.
Definition: backend.hpp:13
bool is_pinned_memory_resources_supported()
Checks if the PinnedMemoryResource is supported for the current CUDA version.
int get_current_numa_node() noexcept
Get the NUMA node ID associated with the calling CPU thread.
Properties for configuring a pinned memory pool.
std::optional< std::size_t > max_pool_size
Maximum size of the pool. std::nullopt means no limit.
std::size_t initial_pool_size
Initial size of the pool. Initial size is important for pinned memory performance,...