RMM: pool_memory_resource.hpp Source File

 /*

  * Copyright (c) 2020-2023, NVIDIA CORPORATION.

  *

  * Licensed under the Apache License, Version 2.0 (the "License");

  * you may not use this file except in compliance with the License.

  * You may obtain a copy of the License at

  *

  *     http://www.apache.org/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing, software

  * distributed under the License is distributed on an "AS IS" BASIS,

  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  * See the License for the specific language governing permissions and

  * limitations under the License.

  */

 #pragma once


 #include <rmm/cuda_stream_view.hpp>

 #include <rmm/detail/aligned.hpp>

 #include <rmm/detail/cuda_util.hpp>

 #include <rmm/detail/error.hpp>

 #include <rmm/detail/logging_assert.hpp>

 #include <rmm/logger.hpp>

 #include <rmm/mr/device/detail/coalescing_free_list.hpp>

 #include <rmm/mr/device/detail/stream_ordered_memory_resource.hpp>

 #include <rmm/mr/device/device_memory_resource.hpp>


 #include <rmm/detail/thrust_namespace.h>

 #include <thrust/iterator/counting_iterator.h>

 #include <thrust/iterator/transform_iterator.h>

 #include <thrust/optional.h>


 #include <fmt/core.h>


 #include <cuda_runtime_api.h>


 #include <algorithm>

 #include <cstddef>

 #include <iostream>

 #include <map>

 #include <mutex>

 #include <numeric>

 #include <set>

 #include <thread>

 #include <unordered_map>

 #include <vector>


 namespace rmm::mr {

 namespace detail {

 template <class PoolResource, class Upstream, class Property, class = void>

 struct maybe_remove_property {};


 template <class PoolResource, class Upstream, class Property>

 struct maybe_remove_property<PoolResource,

                              Upstream,

                              Property,

                              cuda::std::enable_if_t<!cuda::has_property<Upstream, Property>>> {

 #ifdef __GNUC__  // GCC warns about compatibility issues with pre ISO C++ code

 #pragma GCC diagnostic push

 #pragma GCC diagnostic ignored "-Wnon-template-friend"

 #endif  // __GNUC__

   friend void get_property(const PoolResource&, Property) = delete;

 #ifdef __GNUC__

 #pragma GCC diagnostic pop

 #endif  // __GNUC__

 };

 }  // namespace detail


 template <typename Upstream>

 class pool_memory_resource final

   : public detail::

       maybe_remove_property<pool_memory_resource<Upstream>, Upstream, cuda::mr::device_accessible>,

     public detail::stream_ordered_memory_resource<pool_memory_resource<Upstream>,

                                                   detail::coalescing_free_list>,

     public cuda::forward_property<pool_memory_resource<Upstream>, Upstream> {

  public:

   friend class detail::stream_ordered_memory_resource<pool_memory_resource<Upstream>,

                                                       detail::coalescing_free_list>;


   explicit pool_memory_resource(Upstream* upstream_mr,

                                 thrust::optional<std::size_t> initial_pool_size = thrust::nullopt,

                                 thrust::optional<std::size_t> maximum_pool_size = thrust::nullopt)

     : upstream_mr_{[upstream_mr]() {

         RMM_EXPECTS(nullptr != upstream_mr, "Unexpected null upstream pointer.");

         return upstream_mr;

       }()}

   {

     RMM_EXPECTS(rmm::detail::is_aligned(initial_pool_size.value_or(0),

                                         rmm::detail::CUDA_ALLOCATION_ALIGNMENT),

                 "Error, Initial pool size required to be a multiple of 256 bytes");

     RMM_EXPECTS(rmm::detail::is_aligned(maximum_pool_size.value_or(0),

                                         rmm::detail::CUDA_ALLOCATION_ALIGNMENT),

                 "Error, Maximum pool size required to be a multiple of 256 bytes");


     initialize_pool(initial_pool_size, maximum_pool_size);

   }


   template <typename Upstream2                                               = Upstream,

             cuda::std::enable_if_t<cuda::mr::async_resource<Upstream2>, int> = 0>

   explicit pool_memory_resource(Upstream2& upstream_mr,

                                 thrust::optional<std::size_t> initial_pool_size = thrust::nullopt,

                                 thrust::optional<std::size_t> maximum_pool_size = thrust::nullopt)

     : pool_memory_resource(cuda::std::addressof(upstream_mr), initial_pool_size, maximum_pool_size)

   {

   }


   ~pool_memory_resource() override { release(); }


   pool_memory_resource()                                       = delete;

   pool_memory_resource(pool_memory_resource const&)            = delete;

   pool_memory_resource(pool_memory_resource&&)                 = delete;

   pool_memory_resource& operator=(pool_memory_resource const&) = delete;

   pool_memory_resource& operator=(pool_memory_resource&&)      = delete;


   [[nodiscard]] bool supports_streams() const noexcept override { return true; }


   [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; }


   [[nodiscard]] const Upstream& upstream_resource() const noexcept { return *upstream_mr_; }


   Upstream* get_upstream() const noexcept { return upstream_mr_; }


   [[nodiscard]] std::size_t pool_size() const noexcept { return current_pool_size_; }


  protected:

   using free_list  = detail::coalescing_free_list;

   using block_type = free_list::block_type;

   using typename detail::stream_ordered_memory_resource<pool_memory_resource<Upstream>,

                                                         detail::coalescing_free_list>::split_block;

   using lock_guard = std::lock_guard<std::mutex>;


   [[nodiscard]] std::size_t get_maximum_allocation_size() const

   {

     return std::numeric_limits<std::size_t>::max();

   }


   block_type try_to_expand(std::size_t try_size, std::size_t min_size, cuda_stream_view stream)

   {

     while (try_size >= min_size) {

       auto block = block_from_upstream(try_size, stream);

       if (block.has_value()) {

         current_pool_size_ += block.value().size();

         return block.value();

       }

       if (try_size == min_size) {

         break;  // only try `size` once

       }

       try_size = std::max(min_size, try_size / 2);

     }

     RMM_LOG_ERROR("[A][Stream {}][Upstream {}B][FAILURE maximum pool size exceeded]",

                   fmt::ptr(stream.value()),

                   min_size);

     RMM_FAIL("Maximum pool size exceeded", rmm::out_of_memory);

   }


   // NOLINTNEXTLINE(bugprone-easily-swappable-parameters)

   void initialize_pool(thrust::optional<std::size_t> initial_size,

                        thrust::optional<std::size_t> maximum_size)

   {

     auto const try_size = [&]() {

       if (not initial_size.has_value()) {

         auto const [free, total] = (get_upstream()->supports_get_mem_info())

                                      ? get_upstream()->get_mem_info(cuda_stream_legacy)

                                      : rmm::detail::available_device_memory();

         return rmm::detail::align_up(std::min(free, total / 2),

                                      rmm::detail::CUDA_ALLOCATION_ALIGNMENT);

       }

       return initial_size.value();

     }();


     current_pool_size_ = 0;  // try_to_expand will set this if it succeeds

     maximum_pool_size_ = maximum_size;


     RMM_EXPECTS(try_size <= maximum_pool_size_.value_or(std::numeric_limits<std::size_t>::max()),

                 "Initial pool size exceeds the maximum pool size!");


     if (try_size > 0) {

       auto const block = try_to_expand(try_size, try_size, cuda_stream_legacy);

       this->insert_block(block, cuda_stream_legacy);

     }

   }


   block_type expand_pool(std::size_t size, free_list& blocks, cuda_stream_view stream)

   {

     // Strategy: If maximum_pool_size_ is set, then grow geometrically, e.g. by halfway to the

     // limit each time. If it is not set, grow exponentially, e.g. by doubling the pool size each

     // time. Upon failure, attempt to back off exponentially, e.g. by half the attempted size,

     // until either success or the attempt is less than the requested size.

     return try_to_expand(size_to_grow(size), size, stream);

   }


   [[nodiscard]] std::size_t size_to_grow(std::size_t size) const

   {

     if (maximum_pool_size_.has_value()) {

       auto const unaligned_remaining = maximum_pool_size_.value() - pool_size();

       using rmm::detail::align_up;

       auto const remaining = align_up(unaligned_remaining, rmm::detail::CUDA_ALLOCATION_ALIGNMENT);

       auto const aligned_size = align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT);

       return (aligned_size <= remaining) ? std::max(aligned_size, remaining / 2) : 0;

     }

     return std::max(size, pool_size());

   };


   thrust::optional<block_type> block_from_upstream(std::size_t size, cuda_stream_view stream)

   {

     RMM_LOG_DEBUG("[A][Stream {}][Upstream {}B]", fmt::ptr(stream.value()), size);


     if (size == 0) { return {}; }


     try {

       void* ptr = get_upstream()->allocate_async(size, stream);

       return thrust::optional<block_type>{

         *upstream_blocks_.emplace(static_cast<char*>(ptr), size, true).first};

     } catch (std::exception const& e) {

       return thrust::nullopt;

     }

   }


   split_block allocate_from_block(block_type const& block, std::size_t size)

   {

     block_type const alloc{block.pointer(), size, block.is_head()};

 #ifdef RMM_POOL_TRACK_ALLOCATIONS

     allocated_blocks_.insert(alloc);

 #endif


     auto rest = (block.size() > size)

                   // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)

                   ? block_type{block.pointer() + size, block.size() - size, false}

                   : block_type{};

     return {alloc, rest};

   }


   block_type free_block(void* ptr, std::size_t size) noexcept

   {

 #ifdef RMM_POOL_TRACK_ALLOCATIONS

     if (ptr == nullptr) return block_type{};

     auto const iter = allocated_blocks_.find(static_cast<char*>(ptr));

     RMM_LOGGING_ASSERT(iter != allocated_blocks_.end());


     auto block = *iter;

     RMM_LOGGING_ASSERT(block.size() == rmm::detail::align_up(size, allocation_alignment));

     allocated_blocks_.erase(iter);


     return block;

 #else

     auto const iter = upstream_blocks_.find(static_cast<char*>(ptr));

     return block_type{static_cast<char*>(ptr), size, (iter != upstream_blocks_.end())};

 #endif

   }


   void release()

   {

     lock_guard lock(this->get_mutex());


     for (auto block : upstream_blocks_) {

       get_upstream()->deallocate(block.pointer(), block.size());

     }

     upstream_blocks_.clear();

 #ifdef RMM_POOL_TRACK_ALLOCATIONS

     allocated_blocks_.clear();

 #endif


     current_pool_size_ = 0;

   }


 #ifdef RMM_DEBUG_PRINT

   void print()

   {

     lock_guard lock(this->get_mutex());


     auto const [free, total] = upstream_mr_->get_mem_info(rmm::cuda_stream_default);

     std::cout << "GPU free memory: " << free << " total: " << total << "\n";


     std::cout << "upstream_blocks: " << upstream_blocks_.size() << "\n";

     std::size_t upstream_total{0};


     for (auto blocks : upstream_blocks_) {

       blocks.print();

       upstream_total += blocks.size();

     }

     std::cout << "total upstream: " << upstream_total << " B\n";


 #ifdef RMM_POOL_TRACK_ALLOCATIONS

     std::cout << "allocated_blocks: " << allocated_blocks_.size() << "\n";

     for (auto block : allocated_blocks_)

       block.print();

 #endif


     this->print_free_blocks();

   }

 #endif


   std::pair<std::size_t, std::size_t> free_list_summary(free_list const& blocks)

   {

     std::size_t largest{};

     std::size_t total{};

     std::for_each(blocks.cbegin(), blocks.cend(), [&largest, &total](auto const& block) {

       total += block.size();

       largest = std::max(largest, block.size());

     });

     return {largest, total};

   }


   [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(

     cuda_stream_view stream) const override

   {

     // TODO implement this

     return {0, 0};

   }


  private:

   Upstream* upstream_mr_;  // The "heap" to allocate the pool from

   std::size_t current_pool_size_{};

   thrust::optional<std::size_t> maximum_pool_size_{};


 #ifdef RMM_POOL_TRACK_ALLOCATIONS

   std::set<block_type, rmm::mr::detail::compare_blocks<block_type>> allocated_blocks_;

 #endif


   // blocks allocated from upstream

   std::set<block_type, rmm::mr::detail::compare_blocks<block_type>> upstream_blocks_;

 };  // namespace mr

   // end of group

 }  // namespace rmm::mr

rmm::cuda_stream_view
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:41

rmm::cuda_stream_view::value
constexpr cudaStream_t value() const noexcept
Get the wrapped stream.
Definition: cuda_stream_view.hpp:75

rmm::mr::pool_memory_resource
A coalescing best-fit suballocator which uses a pool of memory allocated from an upstream memory_reso...
Definition: pool_memory_resource.hpp:108

rmm::mr::pool_memory_resource::initialize_pool
void initialize_pool(thrust::optional< std::size_t > initial_size, thrust::optional< std::size_t > maximum_size)
Allocate initial memory for the pool.
Definition: pool_memory_resource.hpp:289

rmm::mr::pool_memory_resource::free_block
block_type free_block(void *ptr, std::size_t size) noexcept
Finds, frees and returns the block associated with pointer ptr.
Definition: pool_memory_resource.hpp:411

rmm::mr::pool_memory_resource::allocate_from_block
split_block allocate_from_block(block_type const &block, std::size_t size)
Splits block if necessary to return a pointer to memory of size bytes.
Definition: pool_memory_resource.hpp:389

rmm::mr::pool_memory_resource::size_to_grow
std::size_t size_to_grow(std::size_t size) const
Given a minimum size, computes an appropriate size to grow the pool.
Definition: pool_memory_resource.hpp:345

rmm::mr::pool_memory_resource::supports_streams
bool supports_streams() const noexcept override
Queries whether the resource supports use of non-null CUDA streams for allocation/deallocation.
Definition: pool_memory_resource.hpp:190

rmm::mr::pool_memory_resource::upstream_resource
const Upstream & upstream_resource() const noexcept
Get the upstream memory_resource object.
Definition: pool_memory_resource.hpp:204

rmm::mr::pool_memory_resource::block_from_upstream
thrust::optional< block_type > block_from_upstream(std::size_t size, cuda_stream_view stream)
Allocate a block from upstream to expand the suballocation pool.
Definition: pool_memory_resource.hpp:364

rmm::mr::pool_memory_resource::block_type
free_list::block_type block_type
The type of block returned by the free list.
Definition: pool_memory_resource.hpp:224

rmm::mr::pool_memory_resource::free_list_summary
std::pair< std::size_t, std::size_t > free_list_summary(free_list const &blocks)
Get the largest available block size and total free size in the specified free list.
Definition: pool_memory_resource.hpp:489

rmm::mr::pool_memory_resource::do_get_mem_info
std::pair< std::size_t, std::size_t > do_get_mem_info(cuda_stream_view stream) const override
Get free and available memory for memory resource.
Definition: pool_memory_resource.hpp:508

rmm::mr::pool_memory_resource::get_maximum_allocation_size
std::size_t get_maximum_allocation_size() const
Get the maximum size of allocations supported by this memory resource.
Definition: pool_memory_resource.hpp:237

rmm::mr::pool_memory_resource::expand_pool
block_type expand_pool(std::size_t size, free_list &blocks, cuda_stream_view stream)
Allocate space from upstream to supply the suballocation pool and return a sufficiently sized block.
Definition: pool_memory_resource.hpp:324

rmm::mr::pool_memory_resource::supports_get_mem_info
bool supports_get_mem_info() const noexcept override
Query whether the resource supports the get_mem_info API.
Definition: pool_memory_resource.hpp:197

rmm::mr::pool_memory_resource::get_upstream
Upstream * get_upstream() const noexcept
Get the upstream memory_resource object.
Definition: pool_memory_resource.hpp:211

rmm::mr::pool_memory_resource::release
void release()
Free all memory allocated from the upstream memory_resource.
Definition: pool_memory_resource.hpp:433

rmm::mr::pool_memory_resource::try_to_expand
block_type try_to_expand(std::size_t try_size, std::size_t min_size, cuda_stream_view stream)
Try to expand the pool by allocating a block of at least min_size bytes from upstream.
Definition: pool_memory_resource.hpp:257

rmm::mr::pool_memory_resource::lock_guard
std::lock_guard< std::mutex > lock_guard
Type of lock used to synchronize access.
Definition: pool_memory_resource.hpp:227

rmm::mr::pool_memory_resource::pool_size
std::size_t pool_size() const noexcept
Computes the size of the current pool.
Definition: pool_memory_resource.hpp:220

rmm::mr::pool_memory_resource::pool_memory_resource
pool_memory_resource(Upstream2 &upstream_mr, thrust::optional< std::size_t > initial_pool_size=thrust::nullopt, thrust::optional< std::size_t > maximum_pool_size=thrust::nullopt)
Construct a pool_memory_resource and allocate the initial device memory pool using upstream_mr.
Definition: pool_memory_resource.hpp:165

rmm::mr::pool_memory_resource::~pool_memory_resource
~pool_memory_resource() override
Destroy the pool_memory_resource and deallocate all memory it allocated using the upstream resource.
Definition: pool_memory_resource.hpp:176

rmm::mr::pool_memory_resource::free_list
detail::coalescing_free_list free_list
The free list implementation.
Definition: pool_memory_resource.hpp:223

rmm::out_of_memory
Exception thrown when RMM runs out of memory.
Definition: error.hpp:89

cuda_stream_view.hpp

device_memory_resource.hpp

rmm::mr::detail::maybe_remove_property
A helper class to remove the device_accessible property.
Definition: pool_memory_resource.hpp:67