rmm_resource_adaptor_impl.hpp
1 
6 #pragma once
7 
8 #include <cstddef>
9 #include <cstdint>
10 #include <mutex>
11 #include <optional>
12 #include <stack>
13 #include <thread>
14 #include <unordered_map>
15 #include <unordered_set>
16 
17 #include <cuda/memory_resource>
18 
19 #include <rmm/aligned.hpp>
20 #include <rmm/error.hpp>
21 #include <rmm/resource_ref.hpp>
22 
23 #include <rapidsmpf/memory/scoped_memory_record.hpp>
24 
25 namespace rapidsmpf::detail {
26 
36  public:
44  cuda::mr::any_resource<cuda::mr::device_accessible> primary_mr,
45  std::optional<cuda::mr::any_resource<cuda::mr::device_accessible>> fallback_mr
46  );
47 
48  ~RmmResourceAdaptorImpl() = default;
49 
52  RmmResourceAdaptorImpl& operator=(RmmResourceAdaptorImpl const&) = delete;
53  RmmResourceAdaptorImpl& operator=(RmmResourceAdaptorImpl&&) = delete;
54 
61  [[nodiscard]] bool operator==(RmmResourceAdaptorImpl const& other) const noexcept;
62 
64  [[nodiscard]] rmm::device_async_resource_ref get_upstream_resource() const noexcept;
65 
67  [[nodiscard]] std::optional<rmm::device_async_resource_ref>
68  get_fallback_resource() const noexcept;
69 
71  [[nodiscard]] ScopedMemoryRecord get_main_record() const;
72 
74  [[nodiscard]] std::int64_t current_allocated() const noexcept;
75 
78 
81 
90  void* allocate(
91  cuda::stream_ref stream,
92  std::size_t bytes,
93  std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT
94  );
95 
105  cuda::stream_ref stream,
106  void* ptr,
107  std::size_t bytes,
108  std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT
109  ) noexcept;
110 
119  std::size_t bytes, std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT
120  );
121 
130  void* ptr,
131  std::size_t bytes,
132  std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT
133  ) noexcept;
134 
136  friend void get_property(
137  RmmResourceAdaptorImpl const&, cuda::mr::device_accessible
138  ) noexcept {}
139 
140  private:
141  mutable std::mutex mutex_;
142  cuda::mr::any_resource<cuda::mr::device_accessible> primary_mr_;
143  std::optional<cuda::mr::any_resource<cuda::mr::device_accessible>> fallback_mr_;
144  std::unordered_set<void*> fallback_allocations_;
145 
146  ScopedMemoryRecord main_record_;
147  std::unordered_map<std::thread::id, std::stack<ScopedMemoryRecord>> record_stacks_;
148  std::unordered_map<void*, std::thread::id> allocating_threads_;
149 };
150 
151 } // namespace rapidsmpf::detail
Implementation class for RmmResourceAdaptor.
void begin_scoped_memory_record()
Begin recording a new scoped memory usage record for the current thread.
ScopedMemoryRecord get_main_record() const
Returns a copy of the main memory record.
std::int64_t current_allocated() const noexcept
Get the total current allocated memory from both primary and fallback.
void deallocate_sync(void *ptr, std::size_t bytes, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT) noexcept
Deallocate memory synchronously.
friend void get_property(RmmResourceAdaptorImpl const &, cuda::mr::device_accessible) noexcept
Tag this resource as device-accessible for the CCCL concept.
void * allocate(cuda::stream_ref stream, std::size_t bytes, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT)
Allocate memory asynchronously on the given stream.
bool operator==(RmmResourceAdaptorImpl const &other) const noexcept
Equality comparison (identity-based).
void deallocate(cuda::stream_ref stream, void *ptr, std::size_t bytes, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT) noexcept
Deallocate memory asynchronously on the given stream.
std::optional< rmm::device_async_resource_ref > get_fallback_resource() const noexcept
Get a reference to the fallback upstream resource.
rmm::device_async_resource_ref get_upstream_resource() const noexcept
Get a reference to the primary upstream resource.
void * allocate_sync(std::size_t bytes, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT)
Allocate memory synchronously.
RmmResourceAdaptorImpl(cuda::mr::any_resource< cuda::mr::device_accessible > primary_mr, std::optional< cuda::mr::any_resource< cuda::mr::device_accessible >> fallback_mr)
Construct with primary and optional fallback memory resource.
ScopedMemoryRecord end_scoped_memory_record()
End the current scoped memory record and return it.
cuda::mr::resource_ref< cuda::mr::device_accessible > device_async_resource_ref
static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT
Memory statistics for a specific scope.