pool_memory_resource.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020-2023, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <rmm/cuda_stream_view.hpp>
19 #include <rmm/detail/aligned.hpp>
20 #include <rmm/detail/cuda_util.hpp>
21 #include <rmm/detail/error.hpp>
22 #include <rmm/detail/logging_assert.hpp>
23 #include <rmm/logger.hpp>
24 #include <rmm/mr/device/detail/coalescing_free_list.hpp>
25 #include <rmm/mr/device/detail/stream_ordered_memory_resource.hpp>
27 
28 #include <rmm/detail/thrust_namespace.h>
29 #include <thrust/iterator/counting_iterator.h>
30 #include <thrust/iterator/transform_iterator.h>
31 #include <thrust/optional.h>
32 
33 #include <fmt/core.h>
34 
35 #include <cuda_runtime_api.h>
36 
37 #include <algorithm>
38 #include <cstddef>
39 #include <iostream>
40 #include <map>
41 #include <mutex>
42 #include <numeric>
43 #include <set>
44 #include <thread>
45 #include <unordered_map>
46 #include <vector>
47 
48 namespace rmm::mr {
54 namespace detail {
66 template <class PoolResource, class Upstream, class Property, class = void>
68 
72 template <class PoolResource, class Upstream, class Property>
73 struct maybe_remove_property<PoolResource,
74  Upstream,
75  Property,
76  cuda::std::enable_if_t<!cuda::has_property<Upstream, Property>>> {
77 #ifdef __GNUC__ // GCC warns about compatibility issues with pre ISO C++ code
78 #pragma GCC diagnostic push
79 #pragma GCC diagnostic ignored "-Wnon-template-friend"
80 #endif // __GNUC__
85  friend void get_property(const PoolResource&, Property) = delete;
86 #ifdef __GNUC__
87 #pragma GCC diagnostic pop
88 #endif // __GNUC__
89 };
90 } // namespace detail
91 
102 template <typename Upstream>
104  : public detail::
105  maybe_remove_property<pool_memory_resource<Upstream>, Upstream, cuda::mr::device_accessible>,
106  public detail::stream_ordered_memory_resource<pool_memory_resource<Upstream>,
107  detail::coalescing_free_list>,
108  public cuda::forward_property<pool_memory_resource<Upstream>, Upstream> {
109  public:
110  friend class detail::stream_ordered_memory_resource<pool_memory_resource<Upstream>,
111  detail::coalescing_free_list>;
112 
129  explicit pool_memory_resource(Upstream* upstream_mr,
130  thrust::optional<std::size_t> initial_pool_size = thrust::nullopt,
131  thrust::optional<std::size_t> maximum_pool_size = thrust::nullopt)
132  : upstream_mr_{[upstream_mr]() {
133  RMM_EXPECTS(nullptr != upstream_mr, "Unexpected null upstream pointer.");
134  return upstream_mr;
135  }()}
136  {
137  RMM_EXPECTS(rmm::detail::is_aligned(initial_pool_size.value_or(0),
138  rmm::detail::CUDA_ALLOCATION_ALIGNMENT),
139  "Error, Initial pool size required to be a multiple of 256 bytes");
140  RMM_EXPECTS(rmm::detail::is_aligned(maximum_pool_size.value_or(0),
141  rmm::detail::CUDA_ALLOCATION_ALIGNMENT),
142  "Error, Maximum pool size required to be a multiple of 256 bytes");
143 
144  initialize_pool(initial_pool_size, maximum_pool_size);
145  }
146 
163  template <typename Upstream2 = Upstream,
164  cuda::std::enable_if_t<cuda::mr::async_resource<Upstream2>, int> = 0>
165  explicit pool_memory_resource(Upstream2& upstream_mr,
166  thrust::optional<std::size_t> initial_pool_size = thrust::nullopt,
167  thrust::optional<std::size_t> maximum_pool_size = thrust::nullopt)
168  : pool_memory_resource(cuda::std::addressof(upstream_mr), initial_pool_size, maximum_pool_size)
169  {
170  }
171 
176  ~pool_memory_resource() override { release(); }
177 
178  pool_memory_resource() = delete;
181  pool_memory_resource& operator=(pool_memory_resource const&) = delete;
182  pool_memory_resource& operator=(pool_memory_resource&&) = delete;
183 
190  [[nodiscard]] bool supports_streams() const noexcept override { return true; }
191 
197  [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; }
198 
204  [[nodiscard]] const Upstream& upstream_resource() const noexcept { return *upstream_mr_; }
205 
211  Upstream* get_upstream() const noexcept { return upstream_mr_; }
212 
220  [[nodiscard]] std::size_t pool_size() const noexcept { return current_pool_size_; }
221 
222  protected:
223  using free_list = detail::coalescing_free_list;
224  using block_type = free_list::block_type;
225  using typename detail::stream_ordered_memory_resource<pool_memory_resource<Upstream>,
226  detail::coalescing_free_list>::split_block;
227  using lock_guard = std::lock_guard<std::mutex>;
228 
237  [[nodiscard]] std::size_t get_maximum_allocation_size() const
238  {
239  return std::numeric_limits<std::size_t>::max();
240  }
241 
257  block_type try_to_expand(std::size_t try_size, std::size_t min_size, cuda_stream_view stream)
258  {
259  while (try_size >= min_size) {
260  auto block = block_from_upstream(try_size, stream);
261  if (block.has_value()) {
262  current_pool_size_ += block.value().size();
263  return block.value();
264  }
265  if (try_size == min_size) {
266  break; // only try `size` once
267  }
268  try_size = std::max(min_size, try_size / 2);
269  }
270  RMM_LOG_ERROR("[A][Stream {}][Upstream {}B][FAILURE maximum pool size exceeded]",
271  fmt::ptr(stream.value()),
272  min_size);
273  RMM_FAIL("Maximum pool size exceeded", rmm::out_of_memory);
274  }
275 
288  // NOLINTNEXTLINE(bugprone-easily-swappable-parameters)
289  void initialize_pool(thrust::optional<std::size_t> initial_size,
290  thrust::optional<std::size_t> maximum_size)
291  {
292  auto const try_size = [&]() {
293  if (not initial_size.has_value()) {
294  auto const [free, total] = (get_upstream()->supports_get_mem_info())
295  ? get_upstream()->get_mem_info(cuda_stream_legacy)
296  : rmm::detail::available_device_memory();
297  return rmm::detail::align_up(std::min(free, total / 2),
298  rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
299  }
300  return initial_size.value();
301  }();
302 
303  current_pool_size_ = 0; // try_to_expand will set this if it succeeds
304  maximum_pool_size_ = maximum_size;
305 
306  RMM_EXPECTS(try_size <= maximum_pool_size_.value_or(std::numeric_limits<std::size_t>::max()),
307  "Initial pool size exceeds the maximum pool size!");
308 
309  if (try_size > 0) {
310  auto const block = try_to_expand(try_size, try_size, cuda_stream_legacy);
311  this->insert_block(block, cuda_stream_legacy);
312  }
313  }
314 
324  block_type expand_pool(std::size_t size, free_list& blocks, cuda_stream_view stream)
325  {
326  // Strategy: If maximum_pool_size_ is set, then grow geometrically, e.g. by halfway to the
327  // limit each time. If it is not set, grow exponentially, e.g. by doubling the pool size each
328  // time. Upon failure, attempt to back off exponentially, e.g. by half the attempted size,
329  // until either success or the attempt is less than the requested size.
330  return try_to_expand(size_to_grow(size), size, stream);
331  }
332 
345  [[nodiscard]] std::size_t size_to_grow(std::size_t size) const
346  {
347  if (maximum_pool_size_.has_value()) {
348  auto const unaligned_remaining = maximum_pool_size_.value() - pool_size();
349  using rmm::detail::align_up;
350  auto const remaining = align_up(unaligned_remaining, rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
351  auto const aligned_size = align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
352  return (aligned_size <= remaining) ? std::max(aligned_size, remaining / 2) : 0;
353  }
354  return std::max(size, pool_size());
355  };
356 
364  thrust::optional<block_type> block_from_upstream(std::size_t size, cuda_stream_view stream)
365  {
366  RMM_LOG_DEBUG("[A][Stream {}][Upstream {}B]", fmt::ptr(stream.value()), size);
367 
368  if (size == 0) { return {}; }
369 
370  try {
371  void* ptr = get_upstream()->allocate_async(size, stream);
372  return thrust::optional<block_type>{
373  *upstream_blocks_.emplace(static_cast<char*>(ptr), size, true).first};
374  } catch (std::exception const& e) {
375  return thrust::nullopt;
376  }
377  }
378 
389  split_block allocate_from_block(block_type const& block, std::size_t size)
390  {
391  block_type const alloc{block.pointer(), size, block.is_head()};
392 #ifdef RMM_POOL_TRACK_ALLOCATIONS
393  allocated_blocks_.insert(alloc);
394 #endif
395 
396  auto rest = (block.size() > size)
397  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
398  ? block_type{block.pointer() + size, block.size() - size, false}
399  : block_type{};
400  return {alloc, rest};
401  }
402 
411  block_type free_block(void* ptr, std::size_t size) noexcept
412  {
413 #ifdef RMM_POOL_TRACK_ALLOCATIONS
414  if (ptr == nullptr) return block_type{};
415  auto const iter = allocated_blocks_.find(static_cast<char*>(ptr));
416  RMM_LOGGING_ASSERT(iter != allocated_blocks_.end());
417 
418  auto block = *iter;
419  RMM_LOGGING_ASSERT(block.size() == rmm::detail::align_up(size, allocation_alignment));
420  allocated_blocks_.erase(iter);
421 
422  return block;
423 #else
424  auto const iter = upstream_blocks_.find(static_cast<char*>(ptr));
425  return block_type{static_cast<char*>(ptr), size, (iter != upstream_blocks_.end())};
426 #endif
427  }
428 
433  void release()
434  {
435  lock_guard lock(this->get_mutex());
436 
437  for (auto block : upstream_blocks_) {
438  get_upstream()->deallocate(block.pointer(), block.size());
439  }
440  upstream_blocks_.clear();
441 #ifdef RMM_POOL_TRACK_ALLOCATIONS
442  allocated_blocks_.clear();
443 #endif
444 
445  current_pool_size_ = 0;
446  }
447 
448 #ifdef RMM_DEBUG_PRINT
455  void print()
456  {
457  lock_guard lock(this->get_mutex());
458 
459  auto const [free, total] = upstream_mr_->get_mem_info(rmm::cuda_stream_default);
460  std::cout << "GPU free memory: " << free << " total: " << total << "\n";
461 
462  std::cout << "upstream_blocks: " << upstream_blocks_.size() << "\n";
463  std::size_t upstream_total{0};
464 
465  for (auto blocks : upstream_blocks_) {
466  blocks.print();
467  upstream_total += blocks.size();
468  }
469  std::cout << "total upstream: " << upstream_total << " B\n";
470 
471 #ifdef RMM_POOL_TRACK_ALLOCATIONS
472  std::cout << "allocated_blocks: " << allocated_blocks_.size() << "\n";
473  for (auto block : allocated_blocks_)
474  block.print();
475 #endif
476 
477  this->print_free_blocks();
478  }
479 #endif
480 
489  std::pair<std::size_t, std::size_t> free_list_summary(free_list const& blocks)
490  {
491  std::size_t largest{};
492  std::size_t total{};
493  std::for_each(blocks.cbegin(), blocks.cend(), [&largest, &total](auto const& block) {
494  total += block.size();
495  largest = std::max(largest, block.size());
496  });
497  return {largest, total};
498  }
499 
508  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
509  cuda_stream_view stream) const override
510  {
511  // TODO implement this
512  return {0, 0};
513  }
514 
515  private:
516  Upstream* upstream_mr_; // The "heap" to allocate the pool from
517  std::size_t current_pool_size_{};
518  thrust::optional<std::size_t> maximum_pool_size_{};
519 
520 #ifdef RMM_POOL_TRACK_ALLOCATIONS
521  std::set<block_type, rmm::mr::detail::compare_blocks<block_type>> allocated_blocks_;
522 #endif
523 
524  // blocks allocated from upstream
525  std::set<block_type, rmm::mr::detail::compare_blocks<block_type>> upstream_blocks_;
526 }; // namespace mr
527  // end of group
529 } // namespace rmm::mr
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:41
constexpr cudaStream_t value() const noexcept
Get the wrapped stream.
Definition: cuda_stream_view.hpp:75
A coalescing best-fit suballocator which uses a pool of memory allocated from an upstream memory_reso...
Definition: pool_memory_resource.hpp:108
void initialize_pool(thrust::optional< std::size_t > initial_size, thrust::optional< std::size_t > maximum_size)
Allocate initial memory for the pool.
Definition: pool_memory_resource.hpp:289
block_type free_block(void *ptr, std::size_t size) noexcept
Finds, frees and returns the block associated with pointer ptr.
Definition: pool_memory_resource.hpp:411
split_block allocate_from_block(block_type const &block, std::size_t size)
Splits block if necessary to return a pointer to memory of size bytes.
Definition: pool_memory_resource.hpp:389
std::size_t size_to_grow(std::size_t size) const
Given a minimum size, computes an appropriate size to grow the pool.
Definition: pool_memory_resource.hpp:345
bool supports_streams() const noexcept override
Queries whether the resource supports use of non-null CUDA streams for allocation/deallocation.
Definition: pool_memory_resource.hpp:190
const Upstream & upstream_resource() const noexcept
Get the upstream memory_resource object.
Definition: pool_memory_resource.hpp:204
thrust::optional< block_type > block_from_upstream(std::size_t size, cuda_stream_view stream)
Allocate a block from upstream to expand the suballocation pool.
Definition: pool_memory_resource.hpp:364
free_list::block_type block_type
The type of block returned by the free list.
Definition: pool_memory_resource.hpp:224
std::pair< std::size_t, std::size_t > free_list_summary(free_list const &blocks)
Get the largest available block size and total free size in the specified free list.
Definition: pool_memory_resource.hpp:489
std::pair< std::size_t, std::size_t > do_get_mem_info(cuda_stream_view stream) const override
Get free and available memory for memory resource.
Definition: pool_memory_resource.hpp:508
std::size_t get_maximum_allocation_size() const
Get the maximum size of allocations supported by this memory resource.
Definition: pool_memory_resource.hpp:237
block_type expand_pool(std::size_t size, free_list &blocks, cuda_stream_view stream)
Allocate space from upstream to supply the suballocation pool and return a sufficiently sized block.
Definition: pool_memory_resource.hpp:324
bool supports_get_mem_info() const noexcept override
Query whether the resource supports the get_mem_info API.
Definition: pool_memory_resource.hpp:197
Upstream * get_upstream() const noexcept
Get the upstream memory_resource object.
Definition: pool_memory_resource.hpp:211
void release()
Free all memory allocated from the upstream memory_resource.
Definition: pool_memory_resource.hpp:433
block_type try_to_expand(std::size_t try_size, std::size_t min_size, cuda_stream_view stream)
Try to expand the pool by allocating a block of at least min_size bytes from upstream.
Definition: pool_memory_resource.hpp:257
std::lock_guard< std::mutex > lock_guard
Type of lock used to synchronize access.
Definition: pool_memory_resource.hpp:227
std::size_t pool_size() const noexcept
Computes the size of the current pool.
Definition: pool_memory_resource.hpp:220
pool_memory_resource(Upstream2 &upstream_mr, thrust::optional< std::size_t > initial_pool_size=thrust::nullopt, thrust::optional< std::size_t > maximum_pool_size=thrust::nullopt)
Construct a pool_memory_resource and allocate the initial device memory pool using upstream_mr.
Definition: pool_memory_resource.hpp:165
~pool_memory_resource() override
Destroy the pool_memory_resource and deallocate all memory it allocated using the upstream resource.
Definition: pool_memory_resource.hpp:176
detail::coalescing_free_list free_list
The free list implementation.
Definition: pool_memory_resource.hpp:223
Exception thrown when RMM runs out of memory.
Definition: error.hpp:89
A helper class to remove the device_accessible property.
Definition: pool_memory_resource.hpp:67