fixed_size_memory_resource.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 #pragma once
6 
7 #include <rmm/aligned.hpp>
9 #include <rmm/detail/error.hpp>
10 #include <rmm/detail/export.hpp>
11 #include <rmm/detail/logging_assert.hpp>
12 #include <rmm/mr/detail/fixed_size_free_list.hpp>
13 #include <rmm/mr/detail/stream_ordered_memory_resource.hpp>
14 #include <rmm/resource_ref.hpp>
15 
16 #include <cuda/iterator>
17 #include <cuda_runtime_api.h>
18 
19 #include <algorithm>
20 #include <cstddef>
21 #include <utility>
22 #include <vector>
23 
24 namespace RMM_NAMESPACE {
25 namespace mr {
37 template <typename Upstream>
39  : public detail::stream_ordered_memory_resource<fixed_size_memory_resource<Upstream>,
40  detail::fixed_size_free_list> {
41  public:
42  friend class detail::stream_ordered_memory_resource<fixed_size_memory_resource<Upstream>,
43  detail::fixed_size_free_list>;
44 
45  static constexpr std::size_t default_block_size = 1 << 20;
46 
49  static constexpr std::size_t default_blocks_to_preallocate = 128;
50 
62  explicit fixed_size_memory_resource(
63  device_async_resource_ref upstream_mr,
64  // NOLINTNEXTLINE bugprone-easily-swappable-parameters
65  std::size_t block_size = default_block_size,
66  std::size_t blocks_to_preallocate = default_blocks_to_preallocate)
67  : upstream_mr_{upstream_mr},
68  block_size_{align_up(block_size, CUDA_ALLOCATION_ALIGNMENT)},
69  upstream_chunk_size_{block_size_ * blocks_to_preallocate}
70  {
71  // allocate initial blocks and insert into free list
72  this->insert_blocks(std::move(blocks_from_upstream(cuda_stream_legacy)), cuda_stream_legacy);
73  }
74 
86  explicit fixed_size_memory_resource(
87  Upstream* upstream_mr,
88  // NOLINTNEXTLINE bugprone-easily-swappable-parameters
89  std::size_t block_size = default_block_size,
90  std::size_t blocks_to_preallocate = default_blocks_to_preallocate)
91  : upstream_mr_{to_device_async_resource_ref_checked(upstream_mr)},
92  block_size_{align_up(block_size, CUDA_ALLOCATION_ALIGNMENT)},
93  upstream_chunk_size_{block_size_ * blocks_to_preallocate}
94  {
95  // allocate initial blocks and insert into free list
96  this->insert_blocks(std::move(blocks_from_upstream(cuda_stream_legacy)), cuda_stream_legacy);
97  }
98 
103  ~fixed_size_memory_resource() override { release(); }
104 
105  fixed_size_memory_resource() = delete;
108  fixed_size_memory_resource& operator=(fixed_size_memory_resource const&) = delete;
110 
114  [[nodiscard]] device_async_resource_ref get_upstream_resource() const noexcept
115  {
116  return upstream_mr_;
117  }
118 
124  [[nodiscard]] std::size_t get_block_size() const noexcept { return block_size_; }
125 
126  protected:
127  using free_list = detail::fixed_size_free_list;
128  using block_type = free_list::block_type;
129  using typename detail::stream_ordered_memory_resource<fixed_size_memory_resource<Upstream>,
130  detail::fixed_size_free_list>::split_block;
131  using lock_guard = std::lock_guard<std::mutex>;
132 
139  [[nodiscard]] std::size_t get_maximum_allocation_size() const { return get_block_size(); }
140 
152  block_type expand_pool(std::size_t size, free_list& blocks, cuda_stream_view stream)
153  {
154  blocks.insert(std::move(blocks_from_upstream(stream)));
155  return blocks.get_block(size);
156  }
157 
165  {
166  void* ptr = get_upstream_resource().allocate(stream, upstream_chunk_size_);
167  block_type block{ptr};
168  upstream_blocks_.push_back(block);
169 
170  auto num_blocks = upstream_chunk_size_ / block_size_;
171 
172  auto block_gen = [ptr, this](int index) {
173  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
174  return block_type{static_cast<char*>(ptr) + index * block_size_};
175  };
176  auto first =
177  cuda::make_transform_iterator(cuda::make_counting_iterator(std::size_t{0}), block_gen);
178  return free_list(first, first + num_blocks);
179  }
180 
191  split_block allocate_from_block(block_type const& block, [[maybe_unused]] std::size_t size)
192  {
193  return {block, block_type{nullptr}};
194  }
195 
204  block_type free_block(void* ptr, [[maybe_unused]] std::size_t size) noexcept
205  {
206  // Deallocating a fixed-size block just inserts it in the free list, which is
207  // handled by the parent class
208  RMM_LOGGING_ASSERT(align_up(size, CUDA_ALLOCATION_ALIGNMENT) <= block_size_);
209  return block_type{ptr};
210  }
211 
216  void release()
217  {
218  lock_guard lock(this->get_mutex());
219 
220  for (auto block : upstream_blocks_) {
221  get_upstream_resource().deallocate_sync(block.pointer(), upstream_chunk_size_);
222  }
223  upstream_blocks_.clear();
224  }
225 
226 #ifdef RMM_DEBUG_PRINT
227  void print()
228  {
229  lock_guard lock(this->get_mutex());
230 
231  auto const [free, total] = rmm::available_device_memory();
232  std::cout << "GPU free memory: " << free << " total: " << total << "\n";
233 
234  std::cout << "upstream_blocks: " << upstream_blocks_.size() << "\n";
235  std::size_t upstream_total{0};
236 
237  for (auto blocks : upstream_blocks_) {
238  blocks.print();
239  upstream_total += upstream_chunk_size_;
240  }
241  std::cout << "total upstream: " << upstream_total << " B\n";
242 
243  this->print_free_blocks();
244  }
245 #endif
246 
255  std::pair<std::size_t, std::size_t> free_list_summary(free_list const& blocks)
256  {
257  return blocks.is_empty() ? std::make_pair(std::size_t{0}, std::size_t{0})
258  : std::make_pair(block_size_, blocks.size() * block_size_);
259  }
260 
261  private:
262  device_async_resource_ref upstream_mr_; // The resource from which to allocate new blocks
263 
264  std::size_t block_size_; // size of blocks this MR allocates
265  std::size_t upstream_chunk_size_; // size of chunks allocated from heap MR
266 
267  // blocks allocated from heap: so they can be easily freed
268  std::vector<block_type> upstream_blocks_;
269 };
270  // end of group
272 } // namespace mr
273 } // namespace RMM_NAMESPACE
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:28
A device_memory_resource which allocates memory blocks of a single fixed size.
Definition: fixed_size_memory_resource.hpp:40
std::pair< std::size_t, std::size_t > free_list_summary(free_list const &blocks)
Get the largest available block size and total free size in the specified free list.
Definition: fixed_size_memory_resource.hpp:255
detail::fixed_size_free_list free_list
The free list type.
Definition: fixed_size_memory_resource.hpp:127
std::size_t get_block_size() const noexcept
Get the size of blocks allocated by this memory resource.
Definition: fixed_size_memory_resource.hpp:124
std::size_t get_maximum_allocation_size() const
Get the (fixed) size of allocations supported by this memory resource.
Definition: fixed_size_memory_resource.hpp:139
free_list::block_type block_type
The type of block managed by the free list.
Definition: fixed_size_memory_resource.hpp:128
device_async_resource_ref get_upstream_resource() const noexcept
device_async_resource_ref to the upstream resource
Definition: fixed_size_memory_resource.hpp:114
block_type expand_pool(std::size_t size, free_list &blocks, cuda_stream_view stream)
Allocate a block from upstream to supply the suballocation pool.
Definition: fixed_size_memory_resource.hpp:152
free_list blocks_from_upstream(cuda_stream_view stream)
Allocate blocks from upstream to expand the suballocation pool.
Definition: fixed_size_memory_resource.hpp:164
std::lock_guard< std::mutex > lock_guard
Type of lock used to synchronize access.
Definition: fixed_size_memory_resource.hpp:131
block_type free_block(void *ptr, [[maybe_unused]] std::size_t size) noexcept
Finds, frees and returns the block associated with pointer.
Definition: fixed_size_memory_resource.hpp:204
void release()
free all memory allocated using the upstream resource.
Definition: fixed_size_memory_resource.hpp:216
split_block allocate_from_block(block_type const &block, [[maybe_unused]] std::size_t size)
Splits block if necessary to return a pointer to memory of size bytes.
Definition: fixed_size_memory_resource.hpp:191
std::pair< std::size_t, std::size_t > available_device_memory()
Returns the available and total device memory in bytes for the current device.
detail::cccl_async_resource_ref< cuda::mr::resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
Alias for a cuda::mr::async_resource_ref with the property cuda::mr::device_accessible.
Definition: resource_ref.hpp:32
static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT
Default alignment used for CUDA memory allocation.
Definition: aligned.hpp:25
std::size_t align_up(std::size_t value, std::size_t alignment) noexcept
Align up to nearest multiple of specified power of 2.