librmm/25.12/aligned__resource__adaptor_8hpp_source

 /*

  * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION.

  * SPDX-License-Identifier: Apache-2.0

  */

 #pragma once


 #include <rmm/aligned.hpp>

 #include <rmm/cuda_stream_view.hpp>

 #include <rmm/detail/error.hpp>

 #include <rmm/detail/export.hpp>

 #include <rmm/mr/device_memory_resource.hpp>

 #include <rmm/mr/per_device_resource.hpp>

 #include <rmm/resource_ref.hpp>


 #include <algorithm>

 #include <cstddef>

 #include <mutex>

 #include <unordered_map>


 namespace RMM_NAMESPACE {

 namespace mr {

 template <typename Upstream>

 class aligned_resource_adaptor final : public device_memory_resource {

  public:

   explicit aligned_resource_adaptor(device_async_resource_ref upstream,

                                     std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT,

                                     std::size_t alignment_threshold = default_alignment_threshold)

     : upstream_{upstream},

       alignment_{std::max(alignment, rmm::CUDA_ALLOCATION_ALIGNMENT)},

       alignment_threshold_{alignment_threshold}

   {

     RMM_EXPECTS(rmm::is_supported_alignment(alignment),

                 "Allocation alignment is not a power of 2.");

   }


   explicit aligned_resource_adaptor(Upstream* upstream,

                                     std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT,

                                     std::size_t alignment_threshold = default_alignment_threshold)

     : upstream_{to_device_async_resource_ref_checked(upstream)},

       alignment_{std::max(alignment, rmm::CUDA_ALLOCATION_ALIGNMENT)},

       alignment_threshold_{alignment_threshold}

   {

     RMM_EXPECTS(rmm::is_supported_alignment(alignment),

                 "Allocation alignment is not a power of 2.");

   }


   aligned_resource_adaptor()                                           = delete;

   ~aligned_resource_adaptor() override                                 = default;

   aligned_resource_adaptor(aligned_resource_adaptor const&)            = delete;

   aligned_resource_adaptor(aligned_resource_adaptor&&)                 = delete;

   aligned_resource_adaptor& operator=(aligned_resource_adaptor const&) = delete;

   aligned_resource_adaptor& operator=(aligned_resource_adaptor&&)      = delete;


   [[nodiscard]] rmm::device_async_resource_ref get_upstream_resource() const noexcept

   {

     return upstream_;

   }


   static constexpr std::size_t default_alignment_threshold = 0;


  private:

   using lock_guard = std::lock_guard<std::mutex>;


   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override

   {

     if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) {

       return get_upstream_resource().allocate(stream, bytes, 1);

     }

     auto const size = upstream_allocation_size(bytes);

     void* pointer   = get_upstream_resource().allocate(stream, size, 1);

     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)

     auto const address         = reinterpret_cast<std::size_t>(pointer);

     auto const aligned_address = rmm::align_up(address, alignment_);

     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr)

     void* aligned_pointer = reinterpret_cast<void*>(aligned_address);

     if (pointer != aligned_pointer) {

       lock_guard lock(mtx_);

       pointers_.emplace(aligned_pointer, pointer);

     }

     return aligned_pointer;

   }


   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override

   {

     if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) {

       get_upstream_resource().deallocate(stream, ptr, bytes, 1);

     } else {

       {

         lock_guard lock(mtx_);

         auto const iter = pointers_.find(ptr);

         if (iter != pointers_.end()) {

           ptr = iter->second;

           pointers_.erase(iter);

         }

       }

       get_upstream_resource().deallocate(stream, ptr, upstream_allocation_size(bytes), 1);

     }

   }


   [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override

   {

     if (this == &other) { return true; }

     auto cast = dynamic_cast<aligned_resource_adaptor<Upstream> const*>(&other);

     if (cast == nullptr) { return false; }

     return get_upstream_resource() == cast->get_upstream_resource() &&

            alignment_ == cast->alignment_ && alignment_threshold_ == cast->alignment_threshold_;

   }


   std::size_t upstream_allocation_size(std::size_t bytes) const

   {

     auto const aligned_size = rmm::align_up(bytes, alignment_);

     // aligned_size: bytes of properly aligned space needed

     // (alignment_ - rmm::CUDA_ALLOCATION_ALIGNMENT): maximum "waste" due to pointer misalignment

     return aligned_size + (alignment_ - rmm::CUDA_ALLOCATION_ALIGNMENT);

   }


   device_async_resource_ref upstream_;

   std::unordered_map<void*, void*> pointers_;

   std::size_t alignment_;

   std::size_t alignment_threshold_;

   mutable std::mutex mtx_;

 };

   // end of group

 }  // namespace mr

 }  // namespace RMM_NAMESPACE

aligned.hpp

rmm::cuda_stream_view
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:28

rmm::mr::aligned_resource_adaptor
Resource that adapts Upstream memory resource to allocate memory in a specified alignment size.
Definition: aligned_resource_adaptor.hpp:51

rmm::mr::aligned_resource_adaptor::aligned_resource_adaptor
aligned_resource_adaptor(device_async_resource_ref upstream, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT, std::size_t alignment_threshold=default_alignment_threshold)
Construct an aligned resource adaptor using upstream to satisfy allocation requests.
Definition: aligned_resource_adaptor.hpp:64

rmm::mr::aligned_resource_adaptor::aligned_resource_adaptor
aligned_resource_adaptor(Upstream *upstream, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT, std::size_t alignment_threshold=default_alignment_threshold)
Construct an aligned resource adaptor using upstream to satisfy allocation requests.
Definition: aligned_resource_adaptor.hpp:87

rmm::mr::aligned_resource_adaptor::get_upstream_resource
rmm::device_async_resource_ref get_upstream_resource() const noexcept
rmm::device_async_resource_ref to the upstream resource
Definition: aligned_resource_adaptor.hpp:108

rmm::mr::device_memory_resource
Base class for all librmm device memory allocation.
Definition: device_memory_resource.hpp:83

cuda_stream_view.hpp

device_memory_resource.hpp

rmm::to_device_async_resource_ref_checked
device_async_resource_ref to_device_async_resource_ref_checked(Resource *res)
Convert pointer to memory resource into device_async_resource_ref, checking for nullptr
Definition: resource_ref.hpp:72

rmm::device_async_resource_ref
detail::cccl_async_resource_ref< cuda::mr::resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
Alias for a cuda::mr::async_resource_ref with the property cuda::mr::device_accessible.
Definition: resource_ref.hpp:32

rmm::CUDA_ALLOCATION_ALIGNMENT
static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT
Default alignment used for CUDA memory allocation.
Definition: aligned.hpp:33

rmm::is_supported_alignment
bool is_supported_alignment(std::size_t alignment) noexcept
Returns whether or not alignment is a valid memory alignment.

rmm::align_up
std::size_t align_up(std::size_t value, std::size_t alignment) noexcept
Align up to nearest multiple of specified power of 2.

rmm
RAPIDS Memory Manager - The top-level namespace for all RMM functionality.

per_device_resource.hpp
Management of per-device device_memory_resources.

resource_ref.hpp