pinned_memory_resource.hpp
1 
5 #pragma once
6 
7 #include <cstddef>
8 #include <memory>
9 
10 #include <cuda.h>
11 #include <cuda_runtime_api.h>
12 
13 #include <cuda/memory_resource>
14 
15 #include <rmm/aligned.hpp>
16 #include <rmm/cuda_device.hpp>
17 #include <rmm/cuda_stream_view.hpp>
18 #include <rmm/device_buffer.hpp>
19 
20 #include <rapidsmpf/error.hpp>
21 #include <rapidsmpf/memory/host_memory_resource.hpp>
22 #include <rapidsmpf/system_info.hpp>
23 #include <rapidsmpf/utils.hpp>
24 
25 
27 // NOLINTBEGIN(modernize-macro-to-enum)
28 #define RAPIDSMPF_PINNED_MEM_RES_MIN_CUDA_VERSION 12060
29 #define RAPIDSMPF_PINNED_MEM_RES_MIN_CUDA_VERSION_STR "v12.6"
30 
31 // NOLINTEND(modernize-macro-to-enum)
32 
33 namespace rapidsmpf {
34 
40 inline bool is_pinned_memory_resources_supported() {
41  static const bool supported = [] {
42  // check if the device supports async memory pools
43  int cuda_pool_supported{};
44  auto attr_result = cudaDeviceGetAttribute(
45  &cuda_pool_supported,
46  cudaDevAttrMemoryPoolsSupported,
48  );
49  if (attr_result != cudaSuccess || cuda_pool_supported != 1) {
50  return false;
51  }
52 
53  int cuda_driver_version{};
54  auto driver_result = cudaDriverGetVersion(&cuda_driver_version);
55  int cuda_runtime_version{};
56  auto runtime_result = cudaRuntimeGetVersion(&cuda_runtime_version);
57  return driver_result == cudaSuccess && runtime_result == cudaSuccess
58  && cuda_driver_version >= RAPIDSMPF_PINNED_MEM_RES_MIN_CUDA_VERSION
59  && cuda_runtime_version >= RAPIDSMPF_PINNED_MEM_RES_MIN_CUDA_VERSION;
60  }();
61  return supported;
62 }
63 
64 class PinnedMemoryResource;
65 
74  public:
76  static constexpr auto Disabled = nullptr;
77 
90  PinnedMemoryResource(int numa_id = get_current_numa_node());
91 
103  static std::shared_ptr<PinnedMemoryResource> make_if_available(
104  int numa_id = get_current_numa_node()
105  );
106 
107  ~PinnedMemoryResource() override;
108 
120  void* allocate(
121  rmm::cuda_stream_view stream,
122  std::size_t size,
123  std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT
124  ) override;
125 
135  rmm::cuda_stream_view stream,
136  void* ptr,
137  std::size_t size,
138  std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT
139  ) noexcept override;
140 
150  [[nodiscard]] bool is_equal(HostMemoryResource const& other) const noexcept override;
151 
157  friend void get_property(
158  PinnedMemoryResource const&, cuda::mr::device_accessible
159  ) noexcept {}
160 
161  private:
162  // using PImpl idiom to hide cudax .cuh headers from rapidsmpf. cudax cuh headers will
163  // only be used by the impl in .cu file.
164  struct PinnedMemoryResourceImpl;
165  std::shared_ptr<PinnedMemoryResourceImpl> impl_;
166 };
167 
168 static_assert(cuda::mr::resource<PinnedMemoryResource>);
169 static_assert(cuda::mr::resource_with<PinnedMemoryResource, cuda::mr::host_accessible>);
170 static_assert(cuda::mr::resource_with<PinnedMemoryResource, cuda::mr::device_accessible>);
171 
172 } // namespace rapidsmpf
Host memory resource using standard CPU allocation.
Memory resource that provides pinned (page-locked) host memory using a pool.
bool is_equal(HostMemoryResource const &other) const noexcept override
Compares this resource to another resource.
PinnedMemoryResource(int numa_id=get_current_numa_node())
Construct a pinned (page-locked) host memory resource.
static constexpr auto Disabled
Sentinel value used to disable pinned host memory.
void * allocate(rmm::cuda_stream_view stream, std::size_t size, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT) override
Allocates pinned host memory associated with a CUDA stream.
void deallocate(rmm::cuda_stream_view stream, void *ptr, std::size_t size, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT) noexcept override
Deallocates pinned host memory associated with a CUDA stream.
friend void get_property(PinnedMemoryResource const &, cuda::mr::device_accessible) noexcept
Enables the cuda::mr::host_accessible property.
static std::shared_ptr< PinnedMemoryResource > make_if_available(int numa_id=get_current_numa_node())
Create a pinned memory resource if the system supports pinned memory.
cuda_device_id get_current_cuda_device()
static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT