tracking_resource_adaptor.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 #pragma once
6 
7 #include <rmm/detail/error.hpp>
8 #include <rmm/detail/export.hpp>
9 #include <rmm/detail/stack_trace.hpp>
10 #include <rmm/logger.hpp>
13 #include <rmm/resource_ref.hpp>
14 
15 #include <atomic>
16 #include <cstddef>
17 #include <map>
18 #include <mutex>
19 #include <shared_mutex>
20 #include <sstream>
21 
22 namespace RMM_NAMESPACE {
23 namespace mr {
43 template <typename Upstream>
45  public:
46  using read_lock_t =
47  std::shared_lock<std::shared_mutex>;
48  using write_lock_t =
49  std::unique_lock<std::shared_mutex>;
56  struct allocation_info {
57  std::unique_ptr<rmm::detail::stack_trace> strace;
58  std::size_t allocation_size;
59 
60  allocation_info() = delete;
67  allocation_info(std::size_t size, bool capture_stack)
68  : strace{[&]() {
69  return capture_stack ? std::make_unique<rmm::detail::stack_trace>() : nullptr;
70  }()},
71  allocation_size{size} {};
72  };
73 
81  tracking_resource_adaptor(device_async_resource_ref upstream, bool capture_stacks = false)
82  : capture_stacks_{capture_stacks}, allocated_bytes_{0}, upstream_{upstream}
83  {
84  }
85 
95  tracking_resource_adaptor(Upstream* upstream, bool capture_stacks = false)
96  : capture_stacks_{capture_stacks},
97  allocated_bytes_{0},
98  upstream_{to_device_async_resource_ref_checked(upstream)}
99  {
100  }
101 
102  tracking_resource_adaptor() = delete;
103  ~tracking_resource_adaptor() override = default;
106  default;
107  tracking_resource_adaptor& operator=(tracking_resource_adaptor const&) = delete;
109  default;
110 
114  [[nodiscard]] rmm::device_async_resource_ref get_upstream_resource() const noexcept
115  {
116  return upstream_;
117  }
118 
126  std::map<void*, allocation_info> const& get_outstanding_allocations() const noexcept
127  {
128  return allocations_;
129  }
130 
140  std::size_t get_allocated_bytes() const noexcept { return allocated_bytes_; }
141 
153  {
154  read_lock_t lock(mtx_);
155 
156  std::ostringstream oss;
157 
158  if (!allocations_.empty()) {
159  for (auto const& alloc : allocations_) {
160  oss << alloc.first << ": " << alloc.second.allocation_size << " B";
161  if (alloc.second.strace != nullptr) {
162  oss << " : callstack:" << std::endl << *alloc.second.strace;
163  }
164  oss << std::endl;
165  }
166  }
167 
168  return oss.str();
169  }
170 
176  {
177 #if RMM_LOG_ACTIVE_LEVEL <= RMM_LOG_LEVEL_DEBUG
178  RMM_LOG_DEBUG("Outstanding Allocations: %s", get_outstanding_allocations_str());
179 #endif // RMM_LOG_ACTIVE_LEVEL <= RMM_LOG_LEVEL_DEBUG
180  }
181 
182  private:
196  void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
197  {
198  void* ptr = get_upstream_resource().allocate(stream, bytes);
199  // track it.
200  {
201  write_lock_t lock(mtx_);
202  allocations_.emplace(ptr, allocation_info{bytes, capture_stacks_});
203  }
204  allocated_bytes_ += bytes;
205 
206  return ptr;
207  }
208 
216  void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override
217  {
218  get_upstream_resource().deallocate(stream, ptr, bytes);
219  {
220  write_lock_t lock(mtx_);
221 
222  const auto found = allocations_.find(ptr);
223 
224  // Ensure the allocation is found and the number of bytes match
225  if (found == allocations_.end()) {
226  // Don't throw but log an error. Throwing in a destructor (or any noexcept) will call
227  // std::terminate
228  RMM_LOG_ERROR(
229  "Deallocating a pointer that was not tracked. Ptr: %p [%zuB], Current Num. Allocations: "
230  "%zu",
231  ptr,
232  bytes,
233  this->allocations_.size());
234  } else {
235  auto const allocated_bytes = found->second.allocation_size;
236 
237  allocations_.erase(found);
238 
239  if (allocated_bytes != bytes) {
240  // Don't throw but log an error. Throwing in a destructor (or any noexcept) will call
241  // std::terminate
242  RMM_LOG_ERROR(
243  "Alloc bytes (%zu) and Dealloc bytes (%zu) do not match", allocated_bytes, bytes);
244 
245  bytes = allocated_bytes;
246  }
247  }
248  }
249  allocated_bytes_ -= bytes;
250  }
251 
259  bool do_is_equal(device_memory_resource const& other) const noexcept override
260  {
261  if (this == &other) { return true; }
262  auto cast = dynamic_cast<tracking_resource_adaptor<Upstream> const*>(&other);
263  if (cast == nullptr) { return false; }
264  return get_upstream_resource() == cast->get_upstream_resource();
265  }
266 
267  bool capture_stacks_; // whether or not to capture call stacks
268  std::map<void*, allocation_info> allocations_; // map of active allocations
269  std::atomic<std::size_t> allocated_bytes_; // number of bytes currently allocated
270  std::shared_mutex mutable mtx_; // mutex for thread safe access to allocations_
271  device_async_resource_ref upstream_; // the upstream resource used for satisfying
272  // allocation requests
273 };
274  // end of group
276 } // namespace mr
277 } // namespace RMM_NAMESPACE
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:28
Base class for all librmm device memory allocation.
Definition: device_memory_resource.hpp:83
Resource that uses Upstream to allocate memory and tracks allocations.
Definition: tracking_resource_adaptor.hpp:44
tracking_resource_adaptor(Upstream *upstream, bool capture_stacks=false)
Construct a new tracking resource adaptor using upstream to satisfy allocation requests.
Definition: tracking_resource_adaptor.hpp:95
tracking_resource_adaptor(device_async_resource_ref upstream, bool capture_stacks=false)
Construct a new tracking resource adaptor using upstream to satisfy allocation requests.
Definition: tracking_resource_adaptor.hpp:81
std::size_t get_allocated_bytes() const noexcept
Query the number of bytes that have been allocated. Note that this can not be used to know how large ...
Definition: tracking_resource_adaptor.hpp:140
std::unique_lock< std::shared_mutex > write_lock_t
Type of lock used to synchronize write access.
Definition: tracking_resource_adaptor.hpp:49
tracking_resource_adaptor(tracking_resource_adaptor &&) noexcept=default
Default move constructor.
std::string get_outstanding_allocations_str() const
Gets a string containing the outstanding allocation pointers, their size, and optionally the stack tr...
Definition: tracking_resource_adaptor.hpp:152
std::map< void *, allocation_info > const & get_outstanding_allocations() const noexcept
Get the outstanding allocations map.
Definition: tracking_resource_adaptor.hpp:126
void log_outstanding_allocations() const
Log any outstanding allocations via RMM_LOG_DEBUG.
Definition: tracking_resource_adaptor.hpp:175
std::shared_lock< std::shared_mutex > read_lock_t
Type of lock used to synchronize read access.
Definition: tracking_resource_adaptor.hpp:47
device_async_resource_ref to_device_async_resource_ref_checked(Resource *res)
Convert pointer to memory resource into device_async_resource_ref, checking for nullptr
Definition: resource_ref.hpp:72
detail::cccl_async_resource_ref< cuda::mr::resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
Alias for a cuda::mr::async_resource_ref with the property cuda::mr::device_accessible.
Definition: resource_ref.hpp:32
RAPIDS Memory Manager - The top-level namespace for all RMM functionality.
Management of per-device device_memory_resources.
Information stored about an allocation. Includes the size and a stack trace if the tracking_resource_...
Definition: tracking_resource_adaptor.hpp:56
std::unique_ptr< rmm::detail::stack_trace > strace
Stack trace of the allocation.
Definition: tracking_resource_adaptor.hpp:57
std::size_t allocation_size
Size of the allocation.
Definition: tracking_resource_adaptor.hpp:58
allocation_info(std::size_t size, bool capture_stack)
Construct a new allocation info object.
Definition: tracking_resource_adaptor.hpp:67