rapidsmpf: /__w/rapidsmpf/rapidsmpf/cpp/include/rapidsmpf/detail/rmm_resource_adaptor_impl.hpp Source File

 #pragma once


 #include <cstddef>

 #include <cstdint>

 #include <mutex>

 #include <optional>

 #include <stack>

 #include <thread>

 #include <unordered_map>

 #include <unordered_set>


 #include <cuda/memory_resource>


 #include <rmm/aligned.hpp>

 #include <rmm/error.hpp>

 #include <rmm/resource_ref.hpp>


 #include <rapidsmpf/memory/scoped_memory_record.hpp>


 namespace rapidsmpf::detail {


 class RmmResourceAdaptorImpl {

   public:

     RmmResourceAdaptorImpl(

         cuda::mr::any_resource<cuda::mr::device_accessible> primary_mr,

         std::optional<cuda::mr::any_resource<cuda::mr::device_accessible>> fallback_mr

     );


     ~RmmResourceAdaptorImpl() = default;


     RmmResourceAdaptorImpl(RmmResourceAdaptorImpl const&) = delete;

     RmmResourceAdaptorImpl(RmmResourceAdaptorImpl&&) = delete;

     RmmResourceAdaptorImpl& operator=(RmmResourceAdaptorImpl const&) = delete;

     RmmResourceAdaptorImpl& operator=(RmmResourceAdaptorImpl&&) = delete;


     [[nodiscard]] bool operator==(RmmResourceAdaptorImpl const& other) const noexcept;


     [[nodiscard]] rmm::device_async_resource_ref get_upstream_resource() const noexcept;


     [[nodiscard]] std::optional<rmm::device_async_resource_ref>

     get_fallback_resource() const noexcept;


     [[nodiscard]] ScopedMemoryRecord get_main_record() const;


     [[nodiscard]] std::int64_t current_allocated() const noexcept;


     void begin_scoped_memory_record();


     ScopedMemoryRecord end_scoped_memory_record();


     void* allocate(

         cuda::stream_ref stream,

         std::size_t bytes,

         std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT

     );


     void deallocate(

         cuda::stream_ref stream,

         void* ptr,

         std::size_t bytes,

         std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT

     ) noexcept;


     void* allocate_sync(

         std::size_t bytes, std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT

     );


     void deallocate_sync(

         void* ptr,

         std::size_t bytes,

         std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT

     ) noexcept;


     friend void get_property(

         RmmResourceAdaptorImpl const&, cuda::mr::device_accessible

     ) noexcept {}


   private:

     mutable std::mutex mutex_;

     cuda::mr::any_resource<cuda::mr::device_accessible> primary_mr_;

     std::optional<cuda::mr::any_resource<cuda::mr::device_accessible>> fallback_mr_;

     std::unordered_set<void*> fallback_allocations_;


     ScopedMemoryRecord main_record_;

     std::unordered_map<std::thread::id, std::stack<ScopedMemoryRecord>> record_stacks_;

     std::unordered_map<void*, std::thread::id> allocating_threads_;

 };


 }  // namespace rapidsmpf::detail

aligned.hpp

rapidsmpf::detail::RmmResourceAdaptorImpl
Implementation class for RmmResourceAdaptor.
Definition: rmm_resource_adaptor_impl.hpp:35

rapidsmpf::detail::RmmResourceAdaptorImpl::begin_scoped_memory_record
void begin_scoped_memory_record()
Begin recording a new scoped memory usage record for the current thread.

rapidsmpf::detail::RmmResourceAdaptorImpl::get_main_record
ScopedMemoryRecord get_main_record() const
Returns a copy of the main memory record.

rapidsmpf::detail::RmmResourceAdaptorImpl::current_allocated
std::int64_t current_allocated() const noexcept
Get the total current allocated memory from both primary and fallback.

rapidsmpf::detail::RmmResourceAdaptorImpl::deallocate_sync
void deallocate_sync(void *ptr, std::size_t bytes, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT) noexcept
Deallocate memory synchronously.

rapidsmpf::detail::RmmResourceAdaptorImpl::get_property
friend void get_property(RmmResourceAdaptorImpl const &, cuda::mr::device_accessible) noexcept
Tag this resource as device-accessible for the CCCL concept.
Definition: rmm_resource_adaptor_impl.hpp:136

rapidsmpf::detail::RmmResourceAdaptorImpl::allocate
void * allocate(cuda::stream_ref stream, std::size_t bytes, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT)
Allocate memory asynchronously on the given stream.

rapidsmpf::detail::RmmResourceAdaptorImpl::operator==
bool operator==(RmmResourceAdaptorImpl const &other) const noexcept
Equality comparison (identity-based).

rapidsmpf::detail::RmmResourceAdaptorImpl::deallocate
void deallocate(cuda::stream_ref stream, void *ptr, std::size_t bytes, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT) noexcept
Deallocate memory asynchronously on the given stream.

rapidsmpf::detail::RmmResourceAdaptorImpl::get_fallback_resource
std::optional< rmm::device_async_resource_ref > get_fallback_resource() const noexcept
Get a reference to the fallback upstream resource.

rapidsmpf::detail::RmmResourceAdaptorImpl::get_upstream_resource
rmm::device_async_resource_ref get_upstream_resource() const noexcept
Get a reference to the primary upstream resource.

rapidsmpf::detail::RmmResourceAdaptorImpl::allocate_sync
void * allocate_sync(std::size_t bytes, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT)
Allocate memory synchronously.

rapidsmpf::detail::RmmResourceAdaptorImpl::RmmResourceAdaptorImpl
RmmResourceAdaptorImpl(cuda::mr::any_resource< cuda::mr::device_accessible > primary_mr, std::optional< cuda::mr::any_resource< cuda::mr::device_accessible >> fallback_mr)
Construct with primary and optional fallback memory resource.

rapidsmpf::detail::RmmResourceAdaptorImpl::end_scoped_memory_record
ScopedMemoryRecord end_scoped_memory_record()
End the current scoped memory record and return it.

rmm::device_async_resource_ref
cuda::mr::resource_ref< cuda::mr::device_accessible > device_async_resource_ref

CUDA_ALLOCATION_ALIGNMENT
static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT

rapidsmpf::detail
Definition: rmm_resource_adaptor_impl.hpp:25

rmm

resource_ref.hpp

rapidsmpf::ScopedMemoryRecord
Memory statistics for a specific scope.
Definition: scoped_memory_record.hpp:20