All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Modules Pages
statistics_resource_adaptor.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <rmm/detail/export.hpp>
21 #include <rmm/resource_ref.hpp>
22 
23 #include <cstddef>
24 #include <mutex>
25 #include <shared_mutex>
26 #include <stack>
27 
28 namespace RMM_NAMESPACE {
29 namespace mr {
57 template <typename Upstream>
59  public:
60  using read_lock_t =
61  std::shared_lock<std::shared_mutex>;
62  using write_lock_t =
63  std::unique_lock<std::shared_mutex>;
67  struct counter {
68  int64_t value{0};
69  int64_t peak{0};
70  int64_t total{0};
71 
78  counter& operator+=(int64_t val)
79  {
80  value += val;
81  total += val;
82  peak = std::max(value, peak);
83  return *this;
84  }
85 
92  counter& operator-=(int64_t val)
93  {
94  value -= val;
95  return *this;
96  }
97 
110  {
111  peak = std::max(value + val.peak, peak);
112  value += val.value;
113  total += val.total;
114  }
115  };
116 
123  statistics_resource_adaptor(device_async_resource_ref upstream) : upstream_{upstream} {}
124 
133  statistics_resource_adaptor(Upstream* upstream)
134  : upstream_{to_device_async_resource_ref_checked(upstream)}
135  {
136  }
137 
138  statistics_resource_adaptor() = delete;
139  ~statistics_resource_adaptor() override = default;
141  statistics_resource_adaptor& operator=(statistics_resource_adaptor const&) = delete;
143  default;
145  default;
146 
150  [[nodiscard]] rmm::device_async_resource_ref get_upstream_resource() const noexcept
151  {
152  return upstream_;
153  }
154 
162  counter get_bytes_counter() const noexcept
163  {
164  read_lock_t lock(mtx_);
165 
166  return counter_stack_.top().first;
167  }
168 
177  {
178  read_lock_t lock(mtx_);
179 
180  return counter_stack_.top().second;
181  }
182 
190  std::pair<counter, counter> push_counters()
191  {
192  write_lock_t lock(mtx_);
193  auto ret = counter_stack_.top();
194  counter_stack_.push({counter{}, counter{}});
195  return ret;
196  }
197 
205  std::pair<counter, counter> pop_counters()
206  {
207  write_lock_t lock(mtx_);
208  if (counter_stack_.size() < 2) { throw std::out_of_range("cannot pop the last counter pair"); }
209  auto ret = counter_stack_.top();
210  counter_stack_.pop();
211  // Update the new top pair of counters
212  counter_stack_.top().first.add_counters_from_tracked_sub_block(ret.first);
213  counter_stack_.top().second.add_counters_from_tracked_sub_block(ret.second);
214  return ret;
215  }
216 
217  private:
231  void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
232  {
233  void* ptr = get_upstream_resource().allocate_async(bytes, stream);
234 
235  // increment the stats
236  {
237  write_lock_t lock(mtx_);
238 
239  // Increment the allocation_count_ while we have the lock
240  counter_stack_.top().first += bytes;
241  counter_stack_.top().second += 1;
242  }
243 
244  return ptr;
245  }
246 
254  void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
255  {
256  get_upstream_resource().deallocate_async(ptr, bytes, stream);
257 
258  {
259  write_lock_t lock(mtx_);
260 
261  // Decrement the current allocated counts.
262  counter_stack_.top().first -= bytes;
263  counter_stack_.top().second -= 1;
264  }
265  }
266 
274  bool do_is_equal(device_memory_resource const& other) const noexcept override
275  {
276  if (this == &other) { return true; }
277  auto cast = dynamic_cast<statistics_resource_adaptor<Upstream> const*>(&other);
278  if (cast == nullptr) { return false; }
279  return get_upstream_resource() == cast->get_upstream_resource();
280  }
281 
282  // Stack of counter pairs <bytes, allocations>
283  // Invariant: the stack always contains at least one entry
284  std::stack<std::pair<counter, counter>> counter_stack_{{std::make_pair(counter{}, counter{})}};
285  std::shared_mutex mutable mtx_; // mutex for thread safe access to allocations_
286  // the upstream resource used for satisfying allocation requests
287  device_async_resource_ref upstream_;
288 };
289  // end of group
291 } // namespace mr
292 } // namespace RMM_NAMESPACE
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:39
Base class for all librmm device memory allocation.
Definition: device_memory_resource.hpp:93
Resource that uses Upstream to allocate memory and tracks statistics on memory allocations.
Definition: statistics_resource_adaptor.hpp:58
statistics_resource_adaptor(device_async_resource_ref upstream)
Construct a new statistics resource adaptor using upstream to satisfy allocation requests.
Definition: statistics_resource_adaptor.hpp:123
statistics_resource_adaptor(statistics_resource_adaptor &&) noexcept=default
Default move constructor.
std::pair< counter, counter > push_counters()
Push a pair of zero counters on the stack, which becomes the new counters returned by get_bytes_count...
Definition: statistics_resource_adaptor.hpp:190
std::unique_lock< std::shared_mutex > write_lock_t
Type of lock used to synchronize write access.
Definition: statistics_resource_adaptor.hpp:63
statistics_resource_adaptor(Upstream *upstream)
Construct a new statistics resource adaptor using upstream to satisfy allocation requests.
Definition: statistics_resource_adaptor.hpp:133
std::shared_lock< std::shared_mutex > read_lock_t
Type of lock used to synchronize read access.
Definition: statistics_resource_adaptor.hpp:61
std::pair< counter, counter > pop_counters()
Pop a pair of counters from the stack.
Definition: statistics_resource_adaptor.hpp:205
counter get_allocations_counter() const noexcept
Returns a counter struct for this adaptor containing the current, peak, and total number of allocatio...
Definition: statistics_resource_adaptor.hpp:176
counter get_bytes_counter() const noexcept
Returns a counter struct for this adaptor containing the current, peak, and total number of allocated...
Definition: statistics_resource_adaptor.hpp:162
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
Alias for a cuda::mr::async_resource_ref with the property cuda::mr::device_accessible.
Definition: resource_ref.hpp:41
device_async_resource_ref to_device_async_resource_ref_checked(Resource *res)
Convert pointer to memory resource into device_async_resource_ref, checking for nullptr
Definition: resource_ref.hpp:79
Management of per-device device_memory_resources.
Utility struct for counting the current, peak, and total value of a number.
Definition: statistics_resource_adaptor.hpp:67
counter & operator-=(int64_t val)
Subtract val from the current value and update the peak value if necessary.
Definition: statistics_resource_adaptor.hpp:92
int64_t value
Current value.
Definition: statistics_resource_adaptor.hpp:68
int64_t peak
Max value of value
Definition: statistics_resource_adaptor.hpp:69
void add_counters_from_tracked_sub_block(const counter &val)
Add val to the current value and update the peak value if necessary.
Definition: statistics_resource_adaptor.hpp:109
counter & operator+=(int64_t val)
Add val to the current value and update the peak value if necessary.
Definition: statistics_resource_adaptor.hpp:78
int64_t total
Sum of all added values.
Definition: statistics_resource_adaptor.hpp:70