pinned_memory_resource.hpp
1 
5 #pragma once
6 
7 #include <cstddef>
8 #include <functional>
9 #include <memory>
10 
11 #include <cuda.h>
12 #include <cuda_runtime_api.h>
13 
14 #include <cuda/memory_resource>
15 
16 #include <rmm/aligned.hpp>
17 #include <rmm/cuda_device.hpp>
18 #include <rmm/cuda_stream_view.hpp>
19 #include <rmm/device_buffer.hpp>
20 
21 #include <rapidsmpf/config.hpp>
22 #include <rapidsmpf/detail/rmm_resource_adaptor_impl.hpp>
23 #include <rapidsmpf/error.hpp>
24 #include <rapidsmpf/system_info.hpp>
25 #include <rapidsmpf/utils/misc.hpp>
26 
28 // NOLINTBEGIN(modernize-macro-to-enum)
29 #define RAPIDSMPF_PINNED_MEM_RES_MIN_CUDA_VERSION 12060
30 #define RAPIDSMPF_PINNED_MEM_RES_MIN_CUDA_VERSION_STR "v12.6"
31 
32 // NOLINTEND(modernize-macro-to-enum)
33 
34 namespace rapidsmpf {
35 
45  static const bool supported = [] {
46  // check if the device supports async memory pools
47  int cuda_pool_supported{};
48  auto attr_result = cudaDeviceGetAttribute(
49  &cuda_pool_supported,
50  cudaDevAttrMemoryPoolsSupported,
52  );
53  if (attr_result != cudaSuccess || cuda_pool_supported != 1) {
54  return false;
55  }
56 
57  int cuda_driver_version{};
58  auto driver_result = cudaDriverGetVersion(&cuda_driver_version);
59  int cuda_runtime_version{};
60  auto runtime_result = cudaRuntimeGetVersion(&cuda_runtime_version);
61  return driver_result == cudaSuccess && runtime_result == cudaSuccess
62  && cuda_driver_version >= RAPIDSMPF_PINNED_MEM_RES_MIN_CUDA_VERSION
63  && cuda_runtime_version >= RAPIDSMPF_PINNED_MEM_RES_MIN_CUDA_VERSION;
64  }();
65  return supported;
66 }
67 
75  std::size_t initial_pool_size = 0;
76 
78  std::optional<std::size_t> max_pool_size = std::nullopt;
79 };
80 
94  : public cuda::mr::shared_resource<
95  detail::RmmResourceAdaptorImpl<cuda::pinned_memory_pool>> {
96  using shared_base = cuda::mr::shared_resource<
98 
99  public:
101  static constexpr std::nullopt_t Disabled = std::nullopt;
102 
104  static constexpr bool EnabledByDefault = false;
105 
113  static constexpr std::string_view DefaultInitiPoolSizeFactor = "0%";
114 
123  static constexpr std::string_view DefaultMaxPoolSizeFactor = "80%";
124 
136  static std::optional<PinnedMemoryResource> make_if_available(
137  int numa_id = get_current_numa_node(), PinnedPoolProperties pool_properties = {}
138  );
139 
156  static std::optional<PinnedMemoryResource> from_options(config::Options options);
157 
170  [[nodiscard]] void* allocate(
171  cuda::stream_ref stream,
172  std::size_t size,
173  std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT
174  ) {
175  return get().allocate(stream, size, alignment);
176  }
177 
187  cuda::stream_ref stream,
188  void* ptr,
189  std::size_t size,
190  std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT
191  ) noexcept {
192  get().deallocate(stream, ptr, size, alignment);
193  }
194 
201  [[nodiscard]] bool operator==(PinnedMemoryResource const& other) const noexcept {
202  return get() == other.get();
203  }
204 
210  [[nodiscard]] std::int64_t current_allocated() const noexcept {
211  return get().current_allocated();
212  }
213 
220  return get().get_main_record();
221  }
222 
228  [[nodiscard]] constexpr PinnedPoolProperties const& properties() const noexcept {
229  return pool_properties_;
230  }
231 
239  [[nodiscard]] std::function<std::int64_t()> get_memory_available_cb() const;
240 
244  friend void get_property(
245  PinnedMemoryResource const&, cuda::mr::host_accessible
246  ) noexcept {}
247 
248  private:
260  int numa_id = get_current_numa_node(), PinnedPoolProperties pool_properties = {}
261  );
262 
263  PinnedPoolProperties pool_properties_;
264 };
265 
266 static_assert(cuda::mr::resource<PinnedMemoryResource>);
267 static_assert(cuda::mr::resource_with<PinnedMemoryResource, cuda::mr::host_accessible>);
268 static_assert(cuda::mr::resource_with<PinnedMemoryResource, cuda::mr::device_accessible>);
269 
270 } // namespace rapidsmpf
Memory resource that provides pinned (page-locked) host memory using a pool.
static constexpr std::string_view DefaultInitiPoolSizeFactor
Fraction of total host memory per GPU used as the initial pinned pool size when no explicit pinned_in...
static constexpr std::nullopt_t Disabled
Sentinel value indicating that pinned host memory is disabled.
ScopedMemoryRecord get_main_memory_record() const
Returns the main memory record for the pinned pool.
std::function< std::int64_t()> get_memory_available_cb() const
Returns a memory-availability callback for the pinned pool, if the pool has a configured maximum size...
std::int64_t current_allocated() const noexcept
Returns the total number of currently allocated bytes.
bool operator==(PinnedMemoryResource const &other) const noexcept
Equality comparison.
static constexpr bool EnabledByDefault
Whether pinned host memory is enabled by default.
static std::optional< PinnedMemoryResource > from_options(config::Options options)
Construct from configuration options.
static constexpr std::string_view DefaultMaxPoolSizeFactor
Fraction of total host memory per GPU used as the maximum pinned pool size when no explicit pinned_ma...
static std::optional< PinnedMemoryResource > make_if_available(int numa_id=get_current_numa_node(), PinnedPoolProperties pool_properties={})
Create a pinned memory resource if the system supports pinned memory.
void * allocate(cuda::stream_ref stream, std::size_t size, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT)
Allocates pinned host memory associated with a CUDA stream.
constexpr PinnedPoolProperties const & properties() const noexcept
Returns the properties used to configure the pool.
void deallocate(cuda::stream_ref stream, void *ptr, std::size_t size, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT) noexcept
Deallocates pinned host memory associated with a CUDA stream.
friend void get_property(PinnedMemoryResource const &, cuda::mr::host_accessible) noexcept
Enables the cuda::mr::host_accessible property.
Manages configuration options for RapidsMPF operations.
Definition: config.hpp:140
Implementation class for RmmResourceAdaptor.
cuda_device_id get_current_cuda_device()
static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT
RAPIDS Multi-Processor interfaces.
Definition: backend.hpp:14
bool is_pinned_memory_resources_supported()
Checks if the PinnedMemoryResource is supported for the current CUDA version.
int get_current_numa_node() noexcept
Get the NUMA node ID associated with the calling CPU thread.
Properties for configuring a pinned memory pool.
std::optional< std::size_t > max_pool_size
Maximum size of the pool. std::nullopt means no limit.
std::size_t initial_pool_size
Initial size of the pool. Initial size is important for pinned memory performance,...
Memory statistics for a specific scope.