per_device_resource.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2020-2024, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
8 #include <rmm/cuda_device.hpp>
9 #include <rmm/detail/export.hpp>
12 #include <rmm/resource_ref.hpp>
13 
14 #include <map>
15 #include <mutex>
16 
83 namespace RMM_NAMESPACE {
84 namespace mr {
90 namespace detail {
91 
92 // These symbols must have default visibility so that when they are
93 // referenced in multiple different DSOs the linker correctly
94 // determines that there is only a single unique reference to the
95 // function symbols (and hence they return unique static references
96 // across different DSOs). See also
97 // https://github.com/rapidsai/rmm/issues/826
98 // Although currently the entire RMM namespace is RMM_EXPORT, we
99 // explicitly mark these functions as exported in case the namespace
100 // export changes.
109 {
110  static cuda_memory_resource mr{};
111  return &mr;
112 }
113 
117 RMM_EXPORT inline std::mutex& map_lock()
118 {
119  static std::mutex map_lock;
120  return map_lock;
121 }
122 
126 RMM_EXPORT inline auto& get_map()
127 {
128  static std::map<cuda_device_id::value_type, device_memory_resource*> device_id_to_resource;
129  return device_id_to_resource;
130 }
131 
135 RMM_EXPORT inline std::mutex& ref_map_lock()
136 {
137  static std::mutex ref_map_lock;
138  return ref_map_lock;
139 }
140 
141 // This symbol must have default visibility, see: https://github.com/rapidsai/rmm/issues/826
145 RMM_EXPORT inline auto& get_ref_map()
146 {
147  static std::map<cuda_device_id::value_type, device_async_resource_ref> device_id_to_resource_ref;
148  return device_id_to_resource_ref;
149 }
150 
151 } // namespace detail
152 
176 {
177  std::lock_guard<std::mutex> lock{detail::map_lock()};
178  auto& map = detail::get_map();
179  // If a resource was never set for `id`, set to the initial resource
180  auto const found = map.find(device_id.value());
181  return (found == map.end()) ? (map[device_id.value()] = detail::initial_resource())
182  : found->second;
183 }
184 
185 namespace detail {
186 
187 // The non-thread-safe implementation of `set_per_device_resource_ref`. This exists because
188 // we need to call this function from two places: the thread-safe version of
189 // `set_per_device_resource_ref` and the thread-safe version of `set_per_device_resource`,
190 // both of which take the lock, so we need an implementation that doesn't take the lock.
192 inline device_async_resource_ref set_per_device_resource_ref_unsafe(
193  cuda_device_id device_id, device_async_resource_ref new_resource_ref)
194 {
195  auto& map = detail::get_ref_map();
196  auto const old_itr = map.find(device_id.value());
197  // If a resource didn't previously exist for `device_id`, return pointer to initial_resource
198  // Note: because resource_ref is not default-constructible, we can't use std::map::operator[]
199  if (old_itr == map.end()) {
200  map.insert({device_id.value(), new_resource_ref});
201  return device_async_resource_ref{detail::initial_resource()};
202  }
203 
204  auto old_resource_ref = old_itr->second;
205  old_itr->second = new_resource_ref; // update map directly via iterator
206  return old_resource_ref;
207 }
208 } // namespace detail
209 
238  device_memory_resource* new_mr)
239 {
240  std::lock_guard<std::mutex> lock{detail::map_lock()};
241 
242  // Note: even though set_per_device_resource() and set_per_device_resource_ref() are not
243  // interchangeable, we call the latter from the former to maintain resource_ref
244  // state consistent with the resource pointer state. This is necessary because the
245  // Python API still uses the raw pointer API. Once the Python API is updated to use
246  // resource_ref, this call can be removed.
247  detail::set_per_device_resource_ref_unsafe(device_id, new_mr);
248 
249  auto& map = detail::get_map();
250  auto const old_itr = map.find(device_id.value());
251  // If a resource didn't previously exist for `id`, return pointer to initial_resource
252  auto* old_mr = (old_itr == map.end()) ? detail::initial_resource() : old_itr->second;
253  map[device_id.value()] = (new_mr == nullptr) ? detail::initial_resource() : new_mr;
254  return old_mr;
255 }
256 
279 {
281 }
282 
308 {
310 }
311 
335 {
336  std::lock_guard<std::mutex> lock{detail::ref_map_lock()};
337  auto& map = detail::get_ref_map();
338  // If a resource was never set for `id`, set to the initial resource
339  auto const found = map.find(device_id.value());
340  if (found == map.end()) {
341  auto item = map.insert({device_id.value(), detail::initial_resource()});
342  return item.first->second;
343  }
344  return found->second;
345 }
346 
372  cuda_device_id device_id, device_async_resource_ref new_resource_ref)
373 {
374  std::lock_guard<std::mutex> lock{detail::ref_map_lock()};
375  return detail::set_per_device_resource_ref_unsafe(device_id, new_resource_ref);
376 }
377 
401 {
403 }
404 
427  device_async_resource_ref new_resource_ref)
428 {
429  return set_per_device_resource_ref(rmm::get_current_cuda_device(), new_resource_ref);
430 }
431 
449 {
450  return set_per_device_resource_ref(device_id, detail::initial_resource());
451 }
452 
467 {
469 } // end of group
471 } // namespace mr
472 } // namespace RMM_NAMESPACE
device_memory_resource derived class that uses cudaMalloc/Free for allocation/deallocation.
Definition: cuda_memory_resource.hpp:25
Base class for all librmm device memory allocation.
Definition: device_memory_resource.hpp:83
cuda_device_id get_current_cuda_device()
Returns a cuda_device_id for the current device.
device_async_resource_ref reset_per_device_resource_ref(cuda_device_id device_id)
Reset the device_async_resource_ref for the specified device to the initial resource.
Definition: per_device_resource.hpp:448
device_async_resource_ref set_current_device_resource_ref(device_async_resource_ref new_resource_ref)
Set the device_async_resource_ref for the current device.
Definition: per_device_resource.hpp:426
device_async_resource_ref reset_current_device_resource_ref()
Reset the device_async_resource_ref for the current device to the initial resource.
Definition: per_device_resource.hpp:466
device_async_resource_ref set_per_device_resource_ref(cuda_device_id device_id, device_async_resource_ref new_resource_ref)
Set the device_async_resource_ref for the specified device to new_resource_ref
Definition: per_device_resource.hpp:371
device_memory_resource * set_current_device_resource(device_memory_resource *new_mr)
Set the memory resource for the current device.
Definition: per_device_resource.hpp:307
device_async_resource_ref get_current_device_resource_ref()
Get the device_async_resource_ref for the current device.
Definition: per_device_resource.hpp:400
device_async_resource_ref get_per_device_resource_ref(cuda_device_id device_id)
Get the device_async_resource_ref for the specified device.
Definition: per_device_resource.hpp:334
device_memory_resource * get_current_device_resource()
Get the memory resource for the current device.
Definition: per_device_resource.hpp:278
device_memory_resource * set_per_device_resource(cuda_device_id device_id, device_memory_resource *new_mr)
Set the device_memory_resource for the specified device.
Definition: per_device_resource.hpp:237
device_memory_resource * get_per_device_resource(cuda_device_id device_id)
Get the resource for the specified device.
Definition: per_device_resource.hpp:175
detail::cccl_async_resource_ref< cuda::mr::resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
Alias for a cuda::mr::async_resource_ref with the property cuda::mr::device_accessible.
Definition: resource_ref.hpp:32
auto & get_map()
Reference to the map from device id -> resource.
Definition: per_device_resource.hpp:126
std::mutex & map_lock()
Reference to the lock.
Definition: per_device_resource.hpp:117
auto & get_ref_map()
Reference to the map from device id -> resource_ref.
Definition: per_device_resource.hpp:145
device_memory_resource * initial_resource()
Returns a pointer to the initial resource.
Definition: per_device_resource.hpp:108
std::mutex & ref_map_lock()
Reference to the lock.
Definition: per_device_resource.hpp:135
Strong type for a CUDA device identifier.
Definition: cuda_device.hpp:27
constexpr value_type value() const noexcept
The wrapped integer value.
Definition: cuda_device.hpp:43