statistics_resource_adaptor.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 #pragma once
6 
7 #include <rmm/detail/export.hpp>
10 #include <rmm/resource_ref.hpp>
11 
12 #include <cstddef>
13 #include <memory>
14 #include <mutex>
15 #include <shared_mutex>
16 #include <stack>
17 
18 namespace RMM_NAMESPACE {
19 namespace mr {
47 template <typename Upstream>
49  public:
50  using read_lock_t =
51  std::shared_lock<std::shared_mutex>;
52  using write_lock_t =
53  std::unique_lock<std::shared_mutex>;
57  struct counter {
58  int64_t value{0};
59  int64_t peak{0};
60  int64_t total{0};
61 
68  counter& operator+=(int64_t val)
69  {
70  value += val;
71  total += val;
72  peak = std::max(value, peak);
73  return *this;
74  }
75 
82  counter& operator-=(int64_t val)
83  {
84  value -= val;
85  return *this;
86  }
87 
100  {
101  peak = std::max(value + val.peak, peak);
102  value += val.value;
103  total += val.total;
104  }
105  };
106 
113  statistics_resource_adaptor(device_async_resource_ref upstream) : upstream_{upstream} {}
114 
123  statistics_resource_adaptor(Upstream* upstream)
124  : upstream_{to_device_async_resource_ref_checked(upstream)}
125  {
126  }
127 
128  statistics_resource_adaptor() = delete;
129  ~statistics_resource_adaptor() override = default;
131  statistics_resource_adaptor& operator=(statistics_resource_adaptor const&) = delete;
133  default;
135  default;
136 
140  [[nodiscard]] rmm::device_async_resource_ref get_upstream_resource() const noexcept
141  {
142  return upstream_;
143  }
144 
152  counter get_bytes_counter() const noexcept
153  {
154  read_lock_t lock(mtx_);
155 
156  return counter_stack_.top().first;
157  }
158 
167  {
168  read_lock_t lock(mtx_);
169 
170  return counter_stack_.top().second;
171  }
172 
180  std::pair<counter, counter> push_counters()
181  {
182  write_lock_t lock(mtx_);
183  auto ret = counter_stack_.top();
184  counter_stack_.push({counter{}, counter{}});
185  return ret;
186  }
187 
195  std::pair<counter, counter> pop_counters()
196  {
197  write_lock_t lock(mtx_);
198  if (counter_stack_.size() < 2) { throw std::out_of_range("cannot pop the last counter pair"); }
199  auto ret = counter_stack_.top();
200  counter_stack_.pop();
201  // Update the new top pair of counters
202  counter_stack_.top().first.add_counters_from_tracked_sub_block(ret.first);
203  counter_stack_.top().second.add_counters_from_tracked_sub_block(ret.second);
204  return ret;
205  }
206 
207  private:
221  void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
222  {
223  void* ptr = get_upstream_resource().allocate(stream, bytes);
224 
225  // increment the stats
226  {
227  write_lock_t lock(mtx_);
228 
229  // Increment the allocation_count_ while we have the lock
230  counter_stack_.top().first += bytes;
231  counter_stack_.top().second += 1;
232  }
233 
234  return ptr;
235  }
236 
244  void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override
245  {
246  get_upstream_resource().deallocate(stream, ptr, bytes);
247 
248  {
249  write_lock_t lock(mtx_);
250 
251  // Decrement the current allocated counts.
252  counter_stack_.top().first -= bytes;
253  counter_stack_.top().second -= 1;
254  }
255  }
256 
264  bool do_is_equal(device_memory_resource const& other) const noexcept override
265  {
266  if (this == std::addressof(other)) { return true; }
267  auto cast = dynamic_cast<statistics_resource_adaptor<Upstream> const*>(&other);
268  if (cast == nullptr) { return false; }
269  return get_upstream_resource() == cast->get_upstream_resource();
270  }
271 
272  // Stack of counter pairs <bytes, allocations>
273  // Invariant: the stack always contains at least one entry
274  std::stack<std::pair<counter, counter>> counter_stack_{{std::make_pair(counter{}, counter{})}};
275  std::shared_mutex mutable mtx_; // mutex for thread safe access to allocations_
276  // the upstream resource used for satisfying allocation requests
277  device_async_resource_ref upstream_;
278 };
279  // end of group
281 } // namespace mr
282 } // namespace RMM_NAMESPACE
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:28
Base class for all librmm device memory allocation.
Definition: device_memory_resource.hpp:83
Resource that uses Upstream to allocate memory and tracks statistics on memory allocations.
Definition: statistics_resource_adaptor.hpp:48
statistics_resource_adaptor(device_async_resource_ref upstream)
Construct a new statistics resource adaptor using upstream to satisfy allocation requests.
Definition: statistics_resource_adaptor.hpp:113
statistics_resource_adaptor(statistics_resource_adaptor &&) noexcept=default
Default move constructor.
std::pair< counter, counter > push_counters()
Push a pair of zero counters on the stack, which becomes the new counters returned by get_bytes_count...
Definition: statistics_resource_adaptor.hpp:180
std::unique_lock< std::shared_mutex > write_lock_t
Type of lock used to synchronize write access.
Definition: statistics_resource_adaptor.hpp:53
statistics_resource_adaptor(Upstream *upstream)
Construct a new statistics resource adaptor using upstream to satisfy allocation requests.
Definition: statistics_resource_adaptor.hpp:123
std::shared_lock< std::shared_mutex > read_lock_t
Type of lock used to synchronize read access.
Definition: statistics_resource_adaptor.hpp:51
std::pair< counter, counter > pop_counters()
Pop a pair of counters from the stack.
Definition: statistics_resource_adaptor.hpp:195
counter get_allocations_counter() const noexcept
Returns a counter struct for this adaptor containing the current, peak, and total number of allocatio...
Definition: statistics_resource_adaptor.hpp:166
counter get_bytes_counter() const noexcept
Returns a counter struct for this adaptor containing the current, peak, and total number of allocated...
Definition: statistics_resource_adaptor.hpp:152
device_async_resource_ref to_device_async_resource_ref_checked(Resource *res)
Convert pointer to memory resource into device_async_resource_ref, checking for nullptr
Definition: resource_ref.hpp:72
detail::cccl_async_resource_ref< cuda::mr::resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
Alias for a cuda::mr::async_resource_ref with the property cuda::mr::device_accessible.
Definition: resource_ref.hpp:32
RAPIDS Memory Manager - The top-level namespace for all RMM functionality.
Management of per-device device_memory_resources.
Utility struct for counting the current, peak, and total value of a number.
Definition: statistics_resource_adaptor.hpp:57
counter & operator-=(int64_t val)
Subtract val from the current value and update the peak value if necessary.
Definition: statistics_resource_adaptor.hpp:82
int64_t value
Current value.
Definition: statistics_resource_adaptor.hpp:58
int64_t peak
Max value of value
Definition: statistics_resource_adaptor.hpp:59
void add_counters_from_tracked_sub_block(const counter &val)
Add val to the current value and update the peak value if necessary.
Definition: statistics_resource_adaptor.hpp:99
counter & operator+=(int64_t val)
Add val to the current value and update the peak value if necessary.
Definition: statistics_resource_adaptor.hpp:68
int64_t total
Sum of all added values.
Definition: statistics_resource_adaptor.hpp:60