rmm_resource_adaptor.hpp
1 
6 #pragma once
7 
8 #include <array>
9 #include <cstddef>
10 #include <mutex>
11 #include <optional>
12 #include <stack>
13 #include <thread>
14 #include <type_traits>
15 #include <unordered_map>
16 #include <unordered_set>
17 
18 #include <rmm/error.hpp>
20 #include <rmm/resource_ref.hpp>
21 
22 namespace rapidsmpf {
23 
31  enum class AllocType : std::size_t {
32  PRIMARY = 0,
33  FALLBACK = 1,
34  ALL = 2
35  };
36 
38  using AllocTypeArray = std::array<std::int64_t, 2>;
39 
48  [[nodiscard]] std::int64_t num_total_allocs(
49  AllocType alloc_type = AllocType::ALL
50  ) const noexcept;
51 
62  [[nodiscard]] std::int64_t num_current_allocs(
63  AllocType alloc_type = AllocType::ALL
64  ) const noexcept;
65 
75  [[nodiscard]] std::int64_t current(
76  AllocType alloc_type = AllocType::ALL
77  ) const noexcept;
78 
88  [[nodiscard]] std::int64_t total(
89  AllocType alloc_type = AllocType::ALL
90  ) const noexcept;
91 
105  [[nodiscard]] std::int64_t peak(AllocType alloc_type = AllocType::ALL) const noexcept;
106 
118  void record_allocation(AllocType alloc_type, std::int64_t nbytes);
119 
130  void record_deallocation(AllocType alloc_type, std::int64_t nbytes);
131 
154 
171 
172  private:
173  AllocTypeArray num_current_allocs_{{0, 0}};
174  AllocTypeArray num_total_allocs_{{0, 0}};
175  AllocTypeArray current_{{0, 0}};
176  AllocTypeArray total_{{0, 0}};
177  AllocTypeArray peak_{{0, 0}};
178  std::int64_t highest_peak_{0};
179 };
180 
181 static_assert(
182  std::is_trivially_copyable_v<ScopedMemoryRecord>,
183  "ScopedMemoryRecord must be trivially copyable"
184 );
185 
194  public:
203  std::optional<rmm::device_async_resource_ref> fallback_mr = std::nullopt
204  )
205  : primary_mr_{primary_mr}, fallback_mr_{fallback_mr} {}
206 
207  RmmResourceAdaptor() = delete;
208  ~RmmResourceAdaptor() override = default;
209 
215  [[nodiscard]] rmm::device_async_resource_ref get_upstream_resource() const noexcept {
216  return primary_mr_;
217  }
218 
226  [[nodiscard]] std::optional<rmm::device_async_resource_ref>
227  get_fallback_resource() const noexcept {
228  return fallback_mr_;
229  }
230 
238  [[nodiscard]] ScopedMemoryRecord get_main_record() const;
239 
245  [[nodiscard]] std::int64_t current_allocated() const noexcept;
246 
247 
259 
283 
284  private:
298  void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override;
299 
307  void do_deallocate(
308  void* ptr, std::size_t bytes, rmm::cuda_stream_view stream
309  ) noexcept override;
310 
319  [[nodiscard]] bool do_is_equal(
320  rmm::mr::device_memory_resource const& other
321  ) const noexcept override;
322 
323  mutable std::mutex mutex_;
324  rmm::device_async_resource_ref primary_mr_;
325  std::optional<rmm::device_async_resource_ref> fallback_mr_;
326  std::unordered_set<void*> fallback_allocations_;
327 
329  ScopedMemoryRecord main_record_;
331  std::unordered_map<std::thread::id, std::stack<ScopedMemoryRecord>> record_stacks_;
333  std::unordered_map<void*, std::thread::id> allocating_threads_;
334 };
335 
336 
337 } // namespace rapidsmpf
A RMM memory resource adaptor tailored to RapidsMPF.
RmmResourceAdaptor(rmm::device_async_resource_ref primary_mr, std::optional< rmm::device_async_resource_ref > fallback_mr=std::nullopt)
Construct with specified primary and optional fallback memory resource.
void begin_scoped_memory_record()
Begin recording a new scoped memory usage record for the current thread.
ScopedMemoryRecord get_main_record() const
Returns a copy of the main memory record.
std::int64_t current_allocated() const noexcept
Get the total current allocated memory from both primary and fallback.
rmm::device_async_resource_ref get_upstream_resource() const noexcept
Get a reference to the primary upstream resource.
std::optional< rmm::device_async_resource_ref > get_fallback_resource() const noexcept
Get a reference to the fallback upstream resource.
ScopedMemoryRecord end_scoped_memory_record()
End the current scoped memory record and return it.
device_memory_resource(device_memory_resource const &)=default
detail::cccl_async_resource_ref< cuda::mr::resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
Memory statistics for a specific scope.
ScopedMemoryRecord & add_subscope(ScopedMemoryRecord const &subscope)
Merge the memory statistics of a subscope into this record.
void record_allocation(AllocType alloc_type, std::int64_t nbytes)
Records a memory allocation event.
std::array< std::int64_t, 2 > AllocTypeArray
Array type for storing per-allocator statistics.
std::int64_t peak(AllocType alloc_type=AllocType::ALL) const noexcept
Returns the peak memory usage (in bytes) for the specified allocator type.
void record_deallocation(AllocType alloc_type, std::int64_t nbytes)
Records a memory deallocation event.
AllocType
Allocation source types.
@ PRIMARY
The primary allocator (first-choice allocator).
@ ALL
Aggregated statistics from both primary and fallback allocators.
@ FALLBACK
The fallback allocator (used when the primary fails).
ScopedMemoryRecord & add_scope(ScopedMemoryRecord const &scope)
Merge the memory statistics of another scope into this one.
std::int64_t total(AllocType alloc_type=AllocType::ALL) const noexcept
Returns the total number of bytes allocated.
std::int64_t num_current_allocs(AllocType alloc_type=AllocType::ALL) const noexcept
Returns the number of currently active (non-deallocated) allocations for the specified allocator type...
std::int64_t current(AllocType alloc_type=AllocType::ALL) const noexcept
Returns the current memory usage in bytes for the specified allocator type.
std::int64_t num_total_allocs(AllocType alloc_type=AllocType::ALL) const noexcept
Returns the total number of allocations performed by the specified allocator type.