All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Modules Pages
per_device_resource.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <rmm/cuda_device.hpp>
20 #include <rmm/detail/export.hpp>
23 #include <rmm/resource_ref.hpp>
24 
25 #include <map>
26 #include <mutex>
27 
94 namespace RMM_NAMESPACE {
95 namespace mr {
101 namespace detail {
102 
103 // These symbols must have default visibility so that when they are
104 // referenced in multiple different DSOs the linker correctly
105 // determines that there is only a single unique reference to the
106 // function symbols (and hence they return unique static references
107 // across different DSOs). See also
108 // https://github.com/rapidsai/rmm/issues/826
109 // Although currently the entire RMM namespace is RMM_EXPORT, we
110 // explicitly mark these functions as exported in case the namespace
111 // export changes.
120 {
121  static cuda_memory_resource mr{};
122  return &mr;
123 }
124 
128 RMM_EXPORT inline std::mutex& map_lock()
129 {
130  static std::mutex map_lock;
131  return map_lock;
132 }
133 
137 RMM_EXPORT inline auto& get_map()
138 {
139  static std::map<cuda_device_id::value_type, device_memory_resource*> device_id_to_resource;
140  return device_id_to_resource;
141 }
142 
146 RMM_EXPORT inline std::mutex& ref_map_lock()
147 {
148  static std::mutex ref_map_lock;
149  return ref_map_lock;
150 }
151 
152 // This symbol must have default visibility, see: https://github.com/rapidsai/rmm/issues/826
156 RMM_EXPORT inline auto& get_ref_map()
157 {
158  static std::map<cuda_device_id::value_type, device_async_resource_ref> device_id_to_resource_ref;
159  return device_id_to_resource_ref;
160 }
161 
162 } // namespace detail
163 
187 {
188  std::lock_guard<std::mutex> lock{detail::map_lock()};
189  auto& map = detail::get_map();
190  // If a resource was never set for `id`, set to the initial resource
191  auto const found = map.find(device_id.value());
192  return (found == map.end()) ? (map[device_id.value()] = detail::initial_resource())
193  : found->second;
194 }
195 
196 namespace detail {
197 
198 // The non-thread-safe implementation of `set_per_device_resource_ref`. This exists because
199 // we need to call this function from two places: the thread-safe version of
200 // `set_per_device_resource_ref` and the thread-safe version of `set_per_device_resource`,
201 // both of which take the lock, so we need an implementation that doesn't take the lock.
203 inline device_async_resource_ref set_per_device_resource_ref_unsafe(
204  cuda_device_id device_id, device_async_resource_ref new_resource_ref)
205 {
206  auto& map = detail::get_ref_map();
207  auto const old_itr = map.find(device_id.value());
208  // If a resource didn't previously exist for `device_id`, return pointer to initial_resource
209  // Note: because resource_ref is not default-constructible, we can't use std::map::operator[]
210  if (old_itr == map.end()) {
211  map.insert({device_id.value(), new_resource_ref});
212  return device_async_resource_ref{detail::initial_resource()};
213  }
214 
215  auto old_resource_ref = old_itr->second;
216  old_itr->second = new_resource_ref; // update map directly via iterator
217  return old_resource_ref;
218 }
219 } // namespace detail
220 
249  device_memory_resource* new_mr)
250 {
251  std::lock_guard<std::mutex> lock{detail::map_lock()};
252 
253  // Note: even though set_per_device_resource() and set_per_device_resource_ref() are not
254  // interchangeable, we call the latter from the former to maintain resource_ref
255  // state consistent with the resource pointer state. This is necessary because the
256  // Python API still uses the raw pointer API. Once the Python API is updated to use
257  // resource_ref, this call can be removed.
258  detail::set_per_device_resource_ref_unsafe(device_id, new_mr);
259 
260  auto& map = detail::get_map();
261  auto const old_itr = map.find(device_id.value());
262  // If a resource didn't previously exist for `id`, return pointer to initial_resource
263  auto* old_mr = (old_itr == map.end()) ? detail::initial_resource() : old_itr->second;
264  map[device_id.value()] = (new_mr == nullptr) ? detail::initial_resource() : new_mr;
265  return old_mr;
266 }
267 
290 {
292 }
293 
319 {
321 }
322 
346 {
347  std::lock_guard<std::mutex> lock{detail::ref_map_lock()};
348  auto& map = detail::get_ref_map();
349  // If a resource was never set for `id`, set to the initial resource
350  auto const found = map.find(device_id.value());
351  if (found == map.end()) {
352  auto item = map.insert({device_id.value(), detail::initial_resource()});
353  return item.first->second;
354  }
355  return found->second;
356 }
357 
383  cuda_device_id device_id, device_async_resource_ref new_resource_ref)
384 {
385  std::lock_guard<std::mutex> lock{detail::ref_map_lock()};
386  return detail::set_per_device_resource_ref_unsafe(device_id, new_resource_ref);
387 }
388 
412 {
414 }
415 
438  device_async_resource_ref new_resource_ref)
439 {
440  return set_per_device_resource_ref(rmm::get_current_cuda_device(), new_resource_ref);
441 }
442 
460 {
461  return set_per_device_resource_ref(device_id, detail::initial_resource());
462 }
463 
478 {
480 } // end of group
482 } // namespace mr
483 } // namespace RMM_NAMESPACE
device_memory_resource derived class that uses cudaMalloc/Free for allocation/deallocation.
Definition: cuda_memory_resource.hpp:36
Base class for all libcudf device memory allocation.
Definition: device_memory_resource.hpp:94
cuda_device_id get_current_cuda_device()
Returns a cuda_device_id for the current device.
Definition: cuda_device.hpp:96
device_async_resource_ref reset_per_device_resource_ref(cuda_device_id device_id)
Reset the device_async_resource_ref for the specified device to the initial resource.
Definition: per_device_resource.hpp:459
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
Alias for a cuda::mr::async_resource_ref with the property cuda::mr::device_accessible.
Definition: resource_ref.hpp:41
device_async_resource_ref set_current_device_resource_ref(device_async_resource_ref new_resource_ref)
Set the device_async_resource_ref for the current device.
Definition: per_device_resource.hpp:437
device_async_resource_ref reset_current_device_resource_ref()
Reset the device_async_resource_ref for the current device to the initial resource.
Definition: per_device_resource.hpp:477
device_async_resource_ref set_per_device_resource_ref(cuda_device_id device_id, device_async_resource_ref new_resource_ref)
Set the device_async_resource_ref for the specified device to new_resource_ref
Definition: per_device_resource.hpp:382
device_memory_resource * set_current_device_resource(device_memory_resource *new_mr)
Set the memory resource for the current device.
Definition: per_device_resource.hpp:318
device_async_resource_ref get_current_device_resource_ref()
Get the device_async_resource_ref for the current device.
Definition: per_device_resource.hpp:411
device_async_resource_ref get_per_device_resource_ref(cuda_device_id device_id)
Get the device_async_resource_ref for the specified device.
Definition: per_device_resource.hpp:345
device_memory_resource * get_current_device_resource()
Get the memory resource for the current device.
Definition: per_device_resource.hpp:289
device_memory_resource * set_per_device_resource(cuda_device_id device_id, device_memory_resource *new_mr)
Set the device_memory_resource for the specified device.
Definition: per_device_resource.hpp:248
device_memory_resource * get_per_device_resource(cuda_device_id device_id)
Get the resource for the specified device.
Definition: per_device_resource.hpp:186
auto & get_map()
Reference to the map from device id -> resource.
Definition: per_device_resource.hpp:137
std::mutex & map_lock()
Reference to the lock.
Definition: per_device_resource.hpp:128
auto & get_ref_map()
Reference to the map from device id -> resource_ref.
Definition: per_device_resource.hpp:156
device_memory_resource * initial_resource()
Returns a pointer to the initial resource.
Definition: per_device_resource.hpp:119
std::mutex & ref_map_lock()
Reference to the lock.
Definition: per_device_resource.hpp:146
Strong type for a CUDA device identifier.
Definition: cuda_device.hpp:38
constexpr value_type value() const noexcept
The wrapped integer value.
Definition: cuda_device.hpp:54