pool_memory_resource.hpp
1 /*
2  * Copyright (c) 2020-2021, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <rmm/cuda_stream_view.hpp>
19 #include <rmm/detail/aligned.hpp>
20 #include <rmm/detail/cuda_util.hpp>
21 #include <rmm/detail/error.hpp>
22 #include <rmm/logger.hpp>
23 #include <rmm/mr/device/detail/coalescing_free_list.hpp>
24 #include <rmm/mr/device/detail/stream_ordered_memory_resource.hpp>
25 #include <rmm/mr/device/device_memory_resource.hpp>
26 
27 #include <thrust/iterator/counting_iterator.h>
28 #include <thrust/iterator/transform_iterator.h>
29 #include <thrust/optional.h>
30 
31 #include <cuda_runtime_api.h>
32 
33 #include <algorithm>
34 #include <cstddef>
35 #include <iostream>
36 #include <map>
37 #include <mutex>
38 #include <numeric>
39 #include <set>
40 #include <thread>
41 #include <unordered_map>
42 #include <vector>
43 
44 namespace rmm::mr {
45 
56 template <typename Upstream>
58  : public detail::stream_ordered_memory_resource<pool_memory_resource<Upstream>,
59  detail::coalescing_free_list> {
60  public:
62  detail::coalescing_free_list>;
63 
80  explicit pool_memory_resource(Upstream* upstream_mr,
81  thrust::optional<std::size_t> initial_pool_size = thrust::nullopt,
82  thrust::optional<std::size_t> maximum_pool_size = thrust::nullopt)
83  : upstream_mr_{[upstream_mr]() {
84  RMM_EXPECTS(nullptr != upstream_mr, "Unexpected null upstream pointer.");
85  return upstream_mr;
86  }()}
87  {
88  RMM_EXPECTS(rmm::detail::is_aligned(initial_pool_size.value_or(0),
89  rmm::detail::CUDA_ALLOCATION_ALIGNMENT),
90  "Error, Initial pool size required to be a multiple of 256 bytes");
91  RMM_EXPECTS(rmm::detail::is_aligned(maximum_pool_size.value_or(0),
92  rmm::detail::CUDA_ALLOCATION_ALIGNMENT),
93  "Error, Maximum pool size required to be a multiple of 256 bytes");
94 
95  initialize_pool(initial_pool_size, maximum_pool_size);
96  }
97 
102  ~pool_memory_resource() override { release(); }
103 
104  pool_memory_resource() = delete;
107  pool_memory_resource& operator=(pool_memory_resource const&) = delete;
108  pool_memory_resource& operator=(pool_memory_resource&&) = delete;
109 
116  [[nodiscard]] bool supports_streams() const noexcept override { return true; }
117 
123  [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; }
124 
130  Upstream* get_upstream() const noexcept { return upstream_mr_; }
131 
139  [[nodiscard]] std::size_t pool_size() const noexcept { return current_pool_size_; }
140 
141  protected:
142  using free_list = detail::coalescing_free_list;
143  using block_type = free_list::block_type;
146  using lock_guard = std::lock_guard<std::mutex>;
147 
156  [[nodiscard]] std::size_t get_maximum_allocation_size() const
157  {
158  return std::numeric_limits<std::size_t>::max();
159  }
160 
176  block_type try_to_expand(std::size_t try_size, std::size_t min_size, cuda_stream_view stream)
177  {
178  while (try_size >= min_size) {
179  auto block = block_from_upstream(try_size, stream);
180  if (block.has_value()) {
181  current_pool_size_ += block.value().size();
182  return block.value();
183  }
184  if (try_size == min_size) {
185  break; // only try `size` once
186  }
187  try_size = std::max(min_size, try_size / 2);
188  }
189  RMM_LOG_ERROR("[A][Stream {}][Upstream {}B][FAILURE maximum pool size exceeded]",
190  fmt::ptr(stream.value()),
191  min_size);
192  RMM_FAIL("Maximum pool size exceeded", rmm::out_of_memory);
193  }
194 
207  // NOLINTNEXTLINE(bugprone-easily-swappable-parameters)
208  void initialize_pool(thrust::optional<std::size_t> initial_size,
209  thrust::optional<std::size_t> maximum_size)
210  {
211  auto const try_size = [&]() {
212  if (not initial_size.has_value()) {
213  auto const [free, total] = (get_upstream()->supports_get_mem_info())
214  ? get_upstream()->get_mem_info(cuda_stream_legacy)
215  : rmm::detail::available_device_memory();
216  return rmm::detail::align_up(std::min(free, total / 2),
217  rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
218  }
219  return initial_size.value();
220  }();
221 
222  current_pool_size_ = 0; // try_to_expand will set this if it succeeds
223  maximum_pool_size_ = maximum_size;
224 
225  RMM_EXPECTS(try_size <= maximum_pool_size_.value_or(std::numeric_limits<std::size_t>::max()),
226  "Initial pool size exceeds the maximum pool size!");
227 
228  if (try_size > 0) {
229  auto const block = try_to_expand(try_size, try_size, cuda_stream_legacy);
230  this->insert_block(block, cuda_stream_legacy);
231  }
232  }
233 
243  block_type expand_pool(std::size_t size, free_list& blocks, cuda_stream_view stream)
244  {
245  // Strategy: If maximum_pool_size_ is set, then grow geometrically, e.g. by halfway to the
246  // limit each time. If it is not set, grow exponentially, e.g. by doubling the pool size each
247  // time. Upon failure, attempt to back off exponentially, e.g. by half the attempted size,
248  // until either success or the attempt is less than the requested size.
249  return try_to_expand(size_to_grow(size), size, stream);
250  }
251 
264  [[nodiscard]] std::size_t size_to_grow(std::size_t size) const
265  {
266  if (maximum_pool_size_.has_value()) {
267  auto const unaligned_remaining = maximum_pool_size_.value() - pool_size();
268  using rmm::detail::align_up;
269  auto const remaining = align_up(unaligned_remaining, rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
270  auto const aligned_size = align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
271  return (aligned_size <= remaining) ? std::max(aligned_size, remaining / 2) : 0;
272  }
273  return std::max(size, pool_size());
274  };
275 
283  thrust::optional<block_type> block_from_upstream(std::size_t size, cuda_stream_view stream)
284  {
285  RMM_LOG_DEBUG("[A][Stream {}][Upstream {}B]", fmt::ptr(stream.value()), size);
286 
287  if (size == 0) { return {}; }
288 
289  try {
290  void* ptr = get_upstream()->allocate(size, stream);
291  return thrust::optional<block_type>{
292  *upstream_blocks_.emplace(static_cast<char*>(ptr), size, true).first};
293  } catch (std::exception const& e) {
294  return thrust::nullopt;
295  }
296  }
297 
309  split_block allocate_from_block(block_type const& block, std::size_t size)
310  {
311  block_type const alloc{block.pointer(), size, block.is_head()};
312 #ifdef RMM_POOL_TRACK_ALLOCATIONS
313  allocated_blocks_.insert(alloc);
314 #endif
315 
316  auto rest = (block.size() > size)
317  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
318  ? block_type{block.pointer() + size, block.size() - size, false}
319  : block_type{};
320  return {alloc, rest};
321  }
322 
332  block_type free_block(void* ptr, std::size_t size) noexcept
333  {
334 #ifdef RMM_POOL_TRACK_ALLOCATIONS
335  if (ptr == nullptr) return block_type{};
336  auto const iter = allocated_blocks_.find(static_cast<char*>(ptr));
337  RMM_LOGGING_ASSERT(iter != allocated_blocks_.end());
338 
339  auto block = *iter;
340  RMM_LOGGING_ASSERT(block.size() == rmm::detail::align_up(size, allocation_alignment));
341  allocated_blocks_.erase(iter);
342 
343  return block;
344 #else
345  auto const iter = upstream_blocks_.find(static_cast<char*>(ptr));
346  return block_type{static_cast<char*>(ptr), size, (iter != upstream_blocks_.end())};
347 #endif
348  }
349 
354  void release()
355  {
356  lock_guard lock(this->get_mutex());
357 
358  for (auto block : upstream_blocks_) {
359  get_upstream()->deallocate(block.pointer(), block.size());
360  }
361  upstream_blocks_.clear();
362 #ifdef RMM_POOL_TRACK_ALLOCATIONS
363  allocated_blocks_.clear();
364 #endif
365 
366  current_pool_size_ = 0;
367  }
368 
369 #ifdef RMM_DEBUG_PRINT
370 
376  void print()
377  {
378  lock_guard lock(this->get_mutex());
379 
380  auto const [free, total] = upstream_mr_->get_mem_info(rmm::cuda_stream_default);
381  std::cout << "GPU free memory: " << free << " total: " << total << "\n";
382 
383  std::cout << "upstream_blocks: " << upstream_blocks_.size() << "\n";
384  std::size_t upstream_total{0};
385 
386  for (auto blocks : upstream_blocks_) {
387  blocks.print();
388  upstream_total += blocks.size();
389  }
390  std::cout << "total upstream: " << upstream_total << " B\n";
391 
392 #ifdef RMM_POOL_TRACK_ALLOCATIONS
393  std::cout << "allocated_blocks: " << allocated_blocks_.size() << "\n";
394  for (auto block : allocated_blocks_)
395  block.print();
396 #endif
397 
398  this->print_free_blocks();
399  }
400 #endif
401 
410  std::pair<std::size_t, std::size_t> free_list_summary(free_list const& blocks)
411  {
412  std::size_t largest{};
413  std::size_t total{};
414  std::for_each(blocks.cbegin(), blocks.cend(), [&largest, &total](auto const& block) {
415  total += block.size();
416  largest = std::max(largest, block.size());
417  });
418  return {largest, total};
419  }
420 
429  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
430  cuda_stream_view stream) const override
431  {
432  // TODO implement this
433  return {0, 0};
434  }
435 
436  private:
437  Upstream* upstream_mr_; // The "heap" to allocate the pool from
438  std::size_t current_pool_size_{};
439  thrust::optional<std::size_t> maximum_pool_size_{};
440 
441 #ifdef RMM_POOL_TRACK_ALLOCATIONS
442  std::set<block_type, rmm::mr::detail::compare_blocks<block_type>> allocated_blocks_;
443 #endif
444 
445  // blocks allocated from upstream
446  std::set<block_type, rmm::mr::detail::compare_blocks<block_type>> upstream_blocks_;
447 }; // namespace mr
448 
449 } // namespace rmm::mr
rmm::mr::detail::free_list::cend
const_iterator cend() const noexcept
beginning of the free list
Definition: free_list.hpp:93
rmm::mr::pool_memory_resource::supports_streams
bool supports_streams() const noexcept override
Queries whether the resource supports use of non-null CUDA streams for allocation/deallocation.
Definition: pool_memory_resource.hpp:116
rmm::mr::pool_memory_resource::allocate_from_block
split_block allocate_from_block(block_type const &block, std::size_t size)
Splits block if necessary to return a pointer to memory of size bytes.
Definition: pool_memory_resource.hpp:309
rmm::mr::pool_memory_resource::get_upstream
Upstream * get_upstream() const noexcept
Get the upstream memory_resource object.
Definition: pool_memory_resource.hpp:130
rmm::mr::detail::free_list::cbegin
const_iterator cbegin() const noexcept
beginning of the free list
Definition: free_list.hpp:86
rmm::mr::detail::coalescing_free_list
An ordered list of free memory blocks that coalesces contiguous blocks on insertion.
Definition: coalescing_free_list.hpp:172
rmm::cuda_stream_view
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:34
rmm::mr::pool_memory_resource::block_from_upstream
thrust::optional< block_type > block_from_upstream(std::size_t size, cuda_stream_view stream)
Allocate a block from upstream to expand the suballocation pool.
Definition: pool_memory_resource.hpp:283
rmm::mr::pool_memory_resource::size_to_grow
std::size_t size_to_grow(std::size_t size) const
Given a minimum size, computes an appropriate size to grow the pool.
Definition: pool_memory_resource.hpp:264
rmm::mr::pool_memory_resource::try_to_expand
block_type try_to_expand(std::size_t try_size, std::size_t min_size, cuda_stream_view stream)
Try to expand the pool by allocating a block of at least min_size bytes from upstream.
Definition: pool_memory_resource.hpp:176
rmm::out_of_memory
Exception thrown when RMM runs out of memory.
Definition: error.hpp:68
rmm::mr::pool_memory_resource::pool_size
std::size_t pool_size() const noexcept
Computes the size of the current pool.
Definition: pool_memory_resource.hpp:139
rmm::mr::detail::stream_ordered_memory_resource< pool_memory_resource< Upstream >, detail::coalescing_free_list >::split_block
std::pair< block_type, block_type > split_block
Pair representing a block that has been split for allocation.
Definition: stream_ordered_memory_resource.hpp:120
rmm::mr::pool_memory_resource::free_block
block_type free_block(void *ptr, std::size_t size) noexcept
Finds, frees and returns the block associated with pointer ptr.
Definition: pool_memory_resource.hpp:332
rmm::mr::detail::stream_ordered_memory_resource
Base class for a stream-ordered memory resource.
Definition: stream_ordered_memory_resource.hpp:75
rmm::mr::detail::stream_ordered_memory_resource< pool_memory_resource< Upstream >, detail::coalescing_free_list >::insert_block
void insert_block(block_type const &block, cuda_stream_view stream)
Returns the block b (last used on stream stream_event) to the pool.
Definition: stream_ordered_memory_resource.hpp:152
rmm::mr::pool_memory_resource::expand_pool
block_type expand_pool(std::size_t size, free_list &blocks, cuda_stream_view stream)
Allocate space from upstream to supply the suballocation pool and return a sufficiently sized block.
Definition: pool_memory_resource.hpp:243
rmm::mr::pool_memory_resource::supports_get_mem_info
bool supports_get_mem_info() const noexcept override
Query whether the resource supports the get_mem_info API.
Definition: pool_memory_resource.hpp:123
rmm::mr::detail::stream_ordered_memory_resource< pool_memory_resource< Upstream >, detail::coalescing_free_list >::get_mutex
std::mutex & get_mutex()
Get the mutex object.
Definition: stream_ordered_memory_resource.hpp:181
rmm::mr::pool_memory_resource::free_list_summary
std::pair< std::size_t, std::size_t > free_list_summary(free_list const &blocks)
Get the largest available block size and total free size in the specified free list.
Definition: pool_memory_resource.hpp:410
rmm::cuda_stream_view::value
constexpr cudaStream_t value() const noexcept
Get the wrapped stream.
Definition: cuda_stream_view.hpp:57
rmm::mr::pool_memory_resource::do_get_mem_info
std::pair< std::size_t, std::size_t > do_get_mem_info(cuda_stream_view stream) const override
Get free and available memory for memory resource.
Definition: pool_memory_resource.hpp:429
rmm::mr::pool_memory_resource::initialize_pool
void initialize_pool(thrust::optional< std::size_t > initial_size, thrust::optional< std::size_t > maximum_size)
Allocate initial memory for the pool.
Definition: pool_memory_resource.hpp:208
rmm::mr::pool_memory_resource::~pool_memory_resource
~pool_memory_resource() override
Destroy the pool_memory_resource and deallocate all memory it allocated using the upstream resource.
Definition: pool_memory_resource.hpp:102
rmm::mr::pool_memory_resource
A coalescing best-fit suballocator which uses a pool of memory allocated from an upstream memory_reso...
Definition: pool_memory_resource.hpp:59
rmm::mr::pool_memory_resource::release
void release()
Free all memory allocated from the upstream memory_resource.
Definition: pool_memory_resource.hpp:354
rmm::mr::pool_memory_resource::get_maximum_allocation_size
std::size_t get_maximum_allocation_size() const
Get the maximum size of allocations supported by this memory resource.
Definition: pool_memory_resource.hpp:156