per_device_resource.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
8 #include <rmm/cuda_device.hpp>
9 #include <rmm/detail/export.hpp>
12 #include <rmm/resource_ref.hpp>
13 
14 #include <cuda/memory_resource>
15 
16 #include <map>
17 #include <mutex>
18 
85 namespace RMM_NAMESPACE {
86 namespace mr {
92 namespace detail {
93 
94 // These symbols must have default visibility so that when they are
95 // referenced in multiple different DSOs the linker correctly
96 // determines that there is only a single unique reference to the
97 // function symbols (and hence they return unique static references
98 // across different DSOs). See also
99 // https://github.com/rapidsai/rmm/issues/826
100 // Although currently the entire RMM namespace is RMM_EXPORT, we
101 // explicitly mark these functions as exported in case the namespace
102 // export changes.
111 {
112  static cuda_memory_resource mr{};
113  return &mr;
114 }
115 
119 RMM_EXPORT inline std::mutex& map_lock()
120 {
121  static std::mutex map_lock;
122  return map_lock;
123 }
124 
128 RMM_EXPORT inline auto& get_map()
129 {
130  static std::map<cuda_device_id::value_type, device_memory_resource*> device_id_to_resource;
131  return device_id_to_resource;
132 }
133 
137 RMM_EXPORT inline std::mutex& ref_map_lock()
138 {
139  static std::mutex ref_map_lock;
140  return ref_map_lock;
141 }
142 
143 // This symbol must have default visibility, see: https://github.com/rapidsai/rmm/issues/826
147 RMM_EXPORT inline auto& get_ref_map()
148 {
149  static std::map<cuda_device_id::value_type, cuda::mr::any_resource<cuda::mr::device_accessible>>
150  device_id_to_resource;
151  return device_id_to_resource;
152 }
153 
154 } // namespace detail
155 
179 {
180  std::lock_guard<std::mutex> lock{detail::map_lock()};
181  auto& map = detail::get_map();
182  // If a resource was never set for `id`, set to the initial resource
183  auto const found = map.find(device_id.value());
184  return (found == map.end()) ? (map[device_id.value()] = detail::initial_resource())
185  : found->second;
186 }
187 
188 namespace detail {
189 
190 // The non-thread-safe implementation of `set_per_device_resource_ref`. This exists because
191 // we need to call this function from two places: the thread-safe version of
192 // `set_per_device_resource_ref` and the thread-safe version of `set_per_device_resource`,
193 // both of which take the lock, so we need an implementation that doesn't take the lock.
195 inline device_async_resource_ref set_per_device_resource_ref_unsafe(
196  cuda_device_id device_id, device_async_resource_ref new_resource_ref)
197 {
198  using any_device_resource = cuda::mr::any_resource<cuda::mr::device_accessible>;
199  auto& map = detail::get_ref_map();
200  auto const old_itr = map.find(device_id.value());
201  // If a resource didn't previously exist for `device_id`, return ref to initial_resource
202  if (old_itr == map.end()) {
203  map.emplace(device_id.value(), static_cast<any_device_resource>(new_resource_ref));
204  return device_async_resource_ref{*detail::initial_resource()};
205  }
206 
207  device_async_resource_ref old_resource_ref{old_itr->second};
208  old_itr->second = static_cast<any_device_resource>(new_resource_ref); // reify and store
209  return old_resource_ref;
210 }
211 } // namespace detail
212 
241  device_memory_resource* new_mr)
242 {
243  std::lock_guard<std::mutex> lock{detail::map_lock()};
244 
245  // Note: even though set_per_device_resource() and set_per_device_resource_ref() are not
246  // interchangeable, we call the latter from the former to maintain resource_ref
247  // state consistent with the resource pointer state. This is necessary because the
248  // Python API still uses the raw pointer API. Once the Python API is updated to use
249  // resource_ref, this call can be removed.
250  if (new_mr != nullptr) { detail::set_per_device_resource_ref_unsafe(device_id, new_mr); }
251 
252  auto& map = detail::get_map();
253  auto const old_itr = map.find(device_id.value());
254  // If a resource didn't previously exist for `id`, return pointer to initial_resource
255  auto* old_mr = (old_itr == map.end()) ? detail::initial_resource() : old_itr->second;
256  map[device_id.value()] = (new_mr == nullptr) ? detail::initial_resource() : new_mr;
257  return old_mr;
258 }
259 
282 {
284 }
285 
311 {
313 }
314 
338 {
339  using any_device_resource = cuda::mr::any_resource<cuda::mr::device_accessible>;
340  std::lock_guard<std::mutex> lock{detail::ref_map_lock()};
341  auto& map = detail::get_ref_map();
342  // If a resource was never set for `id`, set to the initial resource
343  auto const found = map.find(device_id.value());
344  if (found == map.end()) {
345  // Create a resource_ref from the initial resource, then reify it to any_resource
346  device_async_resource_ref initial_ref{*detail::initial_resource()};
347  auto item = map.emplace(device_id.value(), static_cast<any_device_resource>(initial_ref));
348  return device_async_resource_ref{item.first->second};
349  }
350  return device_async_resource_ref{found->second};
351 }
352 
378  cuda_device_id device_id, device_async_resource_ref new_resource_ref)
379 {
380  std::lock_guard<std::mutex> lock{detail::ref_map_lock()};
381  return detail::set_per_device_resource_ref_unsafe(device_id, new_resource_ref);
382 }
383 
407 {
409 }
410 
433  device_async_resource_ref new_resource_ref)
434 {
435  return set_per_device_resource_ref(rmm::get_current_cuda_device(), new_resource_ref);
436 }
437 
455 {
456  return set_per_device_resource_ref(device_id, *detail::initial_resource());
457 }
458 
473 {
475 } // end of group
477 } // namespace mr
478 } // namespace RMM_NAMESPACE
device_memory_resource derived class that uses cudaMalloc/Free for allocation/deallocation.
Definition: cuda_memory_resource.hpp:25
Base class for all librmm device memory allocation.
Definition: device_memory_resource.hpp:82
cuda_device_id get_current_cuda_device()
Returns a cuda_device_id for the current device.
device_async_resource_ref reset_per_device_resource_ref(cuda_device_id device_id)
Reset the device_async_resource_ref for the specified device to the initial resource.
Definition: per_device_resource.hpp:454
device_async_resource_ref set_current_device_resource_ref(device_async_resource_ref new_resource_ref)
Set the device_async_resource_ref for the current device.
Definition: per_device_resource.hpp:432
device_async_resource_ref reset_current_device_resource_ref()
Reset the device_async_resource_ref for the current device to the initial resource.
Definition: per_device_resource.hpp:472
device_async_resource_ref set_per_device_resource_ref(cuda_device_id device_id, device_async_resource_ref new_resource_ref)
Set the device_async_resource_ref for the specified device to new_resource_ref
Definition: per_device_resource.hpp:377
device_memory_resource * set_current_device_resource(device_memory_resource *new_mr)
Set the memory resource for the current device.
Definition: per_device_resource.hpp:310
device_async_resource_ref get_current_device_resource_ref()
Get the device_async_resource_ref for the current device.
Definition: per_device_resource.hpp:406
device_async_resource_ref get_per_device_resource_ref(cuda_device_id device_id)
Get the device_async_resource_ref for the specified device.
Definition: per_device_resource.hpp:337
device_memory_resource * get_current_device_resource()
Get the memory resource for the current device.
Definition: per_device_resource.hpp:281
device_memory_resource * set_per_device_resource(cuda_device_id device_id, device_memory_resource *new_mr)
Set the device_memory_resource for the specified device.
Definition: per_device_resource.hpp:240
device_memory_resource * get_per_device_resource(cuda_device_id device_id)
Get the resource for the specified device.
Definition: per_device_resource.hpp:178
detail::cccl_async_resource_ref< cuda::mr::resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
Alias for a cuda::mr::async_resource_ref with the property cuda::mr::device_accessible.
Definition: resource_ref.hpp:32
auto & get_map()
Reference to the map from device id -> resource.
Definition: per_device_resource.hpp:128
std::mutex & map_lock()
Reference to the lock.
Definition: per_device_resource.hpp:119
auto & get_ref_map()
Reference to the map from device id -> any_resource.
Definition: per_device_resource.hpp:147
device_memory_resource * initial_resource()
Returns a pointer to the initial resource.
Definition: per_device_resource.hpp:110
std::mutex & ref_map_lock()
Reference to the lock.
Definition: per_device_resource.hpp:137
Strong type for a CUDA device identifier.
Definition: cuda_device.hpp:27
constexpr value_type value() const noexcept
The wrapped integer value.
Definition: cuda_device.hpp:43