All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Modules Pages
fixed_size_memory_resource.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <rmm/aligned.hpp>
19 #include <rmm/cuda_stream_view.hpp>
20 #include <rmm/detail/error.hpp>
21 #include <rmm/detail/export.hpp>
22 #include <rmm/detail/logging_assert.hpp>
23 #include <rmm/detail/thrust_namespace.h>
24 #include <rmm/mr/device/detail/fixed_size_free_list.hpp>
25 #include <rmm/mr/device/detail/stream_ordered_memory_resource.hpp>
26 #include <rmm/resource_ref.hpp>
27 
28 #include <cuda_runtime_api.h>
29 #include <thrust/iterator/counting_iterator.h>
30 #include <thrust/iterator/transform_iterator.h>
31 
32 #include <algorithm>
33 #include <cstddef>
34 #include <list>
35 #include <map>
36 #include <utility>
37 #include <vector>
38 
39 namespace RMM_NAMESPACE {
40 namespace mr {
52 template <typename Upstream>
54  : public detail::stream_ordered_memory_resource<fixed_size_memory_resource<Upstream>,
55  detail::fixed_size_free_list> {
56  public:
57  friend class detail::stream_ordered_memory_resource<fixed_size_memory_resource<Upstream>,
58  detail::fixed_size_free_list>;
59 
60  static constexpr std::size_t default_block_size = 1 << 20;
61 
64  static constexpr std::size_t default_blocks_to_preallocate = 128;
65 
77  explicit fixed_size_memory_resource(
78  device_async_resource_ref upstream_mr,
79  // NOLINTNEXTLINE bugprone-easily-swappable-parameters
80  std::size_t block_size = default_block_size,
81  std::size_t blocks_to_preallocate = default_blocks_to_preallocate)
82  : upstream_mr_{upstream_mr},
83  block_size_{align_up(block_size, CUDA_ALLOCATION_ALIGNMENT)},
84  upstream_chunk_size_{block_size_ * blocks_to_preallocate}
85  {
86  // allocate initial blocks and insert into free list
87  this->insert_blocks(std::move(blocks_from_upstream(cuda_stream_legacy)), cuda_stream_legacy);
88  }
89 
101  explicit fixed_size_memory_resource(
102  Upstream* upstream_mr,
103  // NOLINTNEXTLINE bugprone-easily-swappable-parameters
104  std::size_t block_size = default_block_size,
105  std::size_t blocks_to_preallocate = default_blocks_to_preallocate)
106  : upstream_mr_{to_device_async_resource_ref_checked(upstream_mr)},
107  block_size_{align_up(block_size, CUDA_ALLOCATION_ALIGNMENT)},
108  upstream_chunk_size_{block_size_ * blocks_to_preallocate}
109  {
110  // allocate initial blocks and insert into free list
111  this->insert_blocks(std::move(blocks_from_upstream(cuda_stream_legacy)), cuda_stream_legacy);
112  }
113 
118  ~fixed_size_memory_resource() override { release(); }
119 
120  fixed_size_memory_resource() = delete;
123  fixed_size_memory_resource& operator=(fixed_size_memory_resource const&) = delete;
125 
129  [[nodiscard]] device_async_resource_ref get_upstream_resource() const noexcept
130  {
131  return upstream_mr_;
132  }
133 
139  [[nodiscard]] std::size_t get_block_size() const noexcept { return block_size_; }
140 
141  protected:
142  using free_list = detail::fixed_size_free_list;
143  using block_type = free_list::block_type;
144  using typename detail::stream_ordered_memory_resource<fixed_size_memory_resource<Upstream>,
145  detail::fixed_size_free_list>::split_block;
146  using lock_guard = std::lock_guard<std::mutex>;
147 
154  [[nodiscard]] std::size_t get_maximum_allocation_size() const { return get_block_size(); }
155 
167  block_type expand_pool(std::size_t size, free_list& blocks, cuda_stream_view stream)
168  {
169  blocks.insert(std::move(blocks_from_upstream(stream)));
170  return blocks.get_block(size);
171  }
172 
180  {
181  void* ptr = get_upstream_resource().allocate_async(upstream_chunk_size_, stream);
182  block_type block{ptr};
183  upstream_blocks_.push_back(block);
184 
185  auto num_blocks = upstream_chunk_size_ / block_size_;
186 
187  auto block_gen = [ptr, this](int index) {
188  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
189  return block_type{static_cast<char*>(ptr) + index * block_size_};
190  };
191  auto first =
192  thrust::make_transform_iterator(thrust::make_counting_iterator(std::size_t{0}), block_gen);
193  return free_list(first, first + num_blocks);
194  }
195 
206  split_block allocate_from_block(block_type const& block, std::size_t size)
207  {
208  return {block, block_type{nullptr}};
209  }
210 
219  block_type free_block(void* ptr, std::size_t size) noexcept
220  {
221  // Deallocating a fixed-size block just inserts it in the free list, which is
222  // handled by the parent class
223  RMM_LOGGING_ASSERT(align_up(size, CUDA_ALLOCATION_ALIGNMENT) <= block_size_);
224  return block_type{ptr};
225  }
226 
231  void release()
232  {
233  lock_guard lock(this->get_mutex());
234 
235  for (auto block : upstream_blocks_) {
236  get_upstream_resource().deallocate(block.pointer(), upstream_chunk_size_);
237  }
238  upstream_blocks_.clear();
239  }
240 
241 #ifdef RMM_DEBUG_PRINT
242  void print()
243  {
244  lock_guard lock(this->get_mutex());
245 
246  auto const [free, total] = rmm::available_device_memory();
247  std::cout << "GPU free memory: " << free << " total: " << total << "\n";
248 
249  std::cout << "upstream_blocks: " << upstream_blocks_.size() << "\n";
250  std::size_t upstream_total{0};
251 
252  for (auto blocks : upstream_blocks_) {
253  blocks.print();
254  upstream_total += upstream_chunk_size_;
255  }
256  std::cout << "total upstream: " << upstream_total << " B\n";
257 
258  this->print_free_blocks();
259  }
260 #endif
261 
270  std::pair<std::size_t, std::size_t> free_list_summary(free_list const& blocks)
271  {
272  return blocks.is_empty() ? std::make_pair(std::size_t{0}, std::size_t{0})
273  : std::make_pair(block_size_, blocks.size() * block_size_);
274  }
275 
276  private:
277  device_async_resource_ref upstream_mr_; // The resource from which to allocate new blocks
278 
279  std::size_t block_size_; // size of blocks this MR allocates
280  std::size_t upstream_chunk_size_; // size of chunks allocated from heap MR
281 
282  // blocks allocated from heap: so they can be easily freed
283  std::vector<block_type> upstream_blocks_;
284 };
285  // end of group
287 } // namespace mr
288 } // namespace RMM_NAMESPACE
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:41
A device_memory_resource which allocates memory blocks of a single fixed size.
Definition: fixed_size_memory_resource.hpp:55
std::pair< std::size_t, std::size_t > free_list_summary(free_list const &blocks)
Get the largest available block size and total free size in the specified free list.
Definition: fixed_size_memory_resource.hpp:270
detail::fixed_size_free_list free_list
The free list type.
Definition: fixed_size_memory_resource.hpp:142
block_type free_block(void *ptr, std::size_t size) noexcept
Finds, frees and returns the block associated with pointer.
Definition: fixed_size_memory_resource.hpp:219
std::size_t get_block_size() const noexcept
Get the size of blocks allocated by this memory resource.
Definition: fixed_size_memory_resource.hpp:139
std::size_t get_maximum_allocation_size() const
Get the (fixed) size of allocations supported by this memory resource.
Definition: fixed_size_memory_resource.hpp:154
free_list::block_type block_type
The type of block managed by the free list.
Definition: fixed_size_memory_resource.hpp:143
device_async_resource_ref get_upstream_resource() const noexcept
device_async_resource_ref to the upstream resource
Definition: fixed_size_memory_resource.hpp:129
block_type expand_pool(std::size_t size, free_list &blocks, cuda_stream_view stream)
Allocate a block from upstream to supply the suballocation pool.
Definition: fixed_size_memory_resource.hpp:167
free_list blocks_from_upstream(cuda_stream_view stream)
Allocate blocks from upstream to expand the suballocation pool.
Definition: fixed_size_memory_resource.hpp:179
std::lock_guard< std::mutex > lock_guard
Type of lock used to synchronize access.
Definition: fixed_size_memory_resource.hpp:146
void release()
free all memory allocated using the upstream resource.
Definition: fixed_size_memory_resource.hpp:231
split_block allocate_from_block(block_type const &block, std::size_t size)
Splits block if necessary to return a pointer to memory of size bytes.
Definition: fixed_size_memory_resource.hpp:206
std::pair< std::size_t, std::size_t > available_device_memory()
Returns the available and total device memory in bytes for the current device.
Definition: cuda_device.hpp:120
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
Alias for a cuda::mr::async_resource_ref with the property cuda::mr::device_accessible.
Definition: resource_ref.hpp:41
static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT
Default alignment used for CUDA memory allocation.
Definition: aligned.hpp:43
constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcept
Align up to nearest multiple of specified power of 2.
Definition: aligned.hpp:77