fixed_size_memory_resource.hpp
1 /*
2  * Copyright (c) 2020-2021, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <rmm/cuda_stream_view.hpp>
19 #include <rmm/detail/aligned.hpp>
20 #include <rmm/detail/error.hpp>
21 #include <rmm/mr/device/detail/fixed_size_free_list.hpp>
22 #include <rmm/mr/device/detail/stream_ordered_memory_resource.hpp>
23 
24 #include <thrust/iterator/counting_iterator.h>
25 #include <thrust/iterator/transform_iterator.h>
26 
27 #include <cuda_runtime_api.h>
28 
29 #include <algorithm>
30 #include <cstddef>
31 #include <list>
32 #include <map>
33 #include <utility>
34 #include <vector>
35 
36 namespace rmm::mr {
37 
43 template <typename Upstream>
45  : public detail::stream_ordered_memory_resource<fixed_size_memory_resource<Upstream>,
46  detail::fixed_size_free_list> {
47  public:
49  detail::fixed_size_free_list>;
50 
51  // A block is the fixed size this resource alloates
52  static constexpr std::size_t default_block_size = 1 << 20; // 1 MiB
53  // This is the number of blocks that the pool starts out with, and also the number of
54  // blocks by which the pool grows when all of its current blocks are allocated
55  static constexpr std::size_t default_blocks_to_preallocate = 128;
56 
68  explicit fixed_size_memory_resource(
69  Upstream* upstream_mr,
70  std::size_t block_size = default_block_size,
71  std::size_t blocks_to_preallocate = default_blocks_to_preallocate)
72  : upstream_mr_{upstream_mr},
73  block_size_{rmm::detail::align_up(block_size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT)},
74  upstream_chunk_size_{block_size * blocks_to_preallocate}
75  {
76  // allocate initial blocks and insert into free list
77  this->insert_blocks(std::move(blocks_from_upstream(cuda_stream_legacy)), cuda_stream_legacy);
78  }
79 
84  ~fixed_size_memory_resource() override { release(); }
85 
86  fixed_size_memory_resource() = delete;
87  fixed_size_memory_resource(fixed_size_memory_resource const&) = delete;
88  fixed_size_memory_resource(fixed_size_memory_resource&&) = delete;
89  fixed_size_memory_resource& operator=(fixed_size_memory_resource const&) = delete;
90  fixed_size_memory_resource& operator=(fixed_size_memory_resource&&) = delete;
91 
98  [[nodiscard]] bool supports_streams() const noexcept override { return true; }
99 
105  [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; }
106 
112  Upstream* get_upstream() const noexcept { return upstream_mr_; }
113 
119  [[nodiscard]] std::size_t get_block_size() const noexcept { return block_size_; }
120 
121  protected:
122  using free_list = detail::fixed_size_free_list;
123  using block_type = free_list::block_type;
124  using typename detail::stream_ordered_memory_resource<fixed_size_memory_resource<Upstream>,
125  detail::fixed_size_free_list>::split_block;
126  using lock_guard = std::lock_guard<std::mutex>;
127 
134  [[nodiscard]] std::size_t get_maximum_allocation_size() const { return get_block_size(); }
135 
146  block_type expand_pool(std::size_t size, free_list& blocks, cuda_stream_view stream)
147  {
148  blocks.insert(std::move(blocks_from_upstream(stream)));
149  return blocks.get_block(size);
150  }
151 
159  free_list blocks_from_upstream(cuda_stream_view stream)
160  {
161  void* ptr = get_upstream()->allocate(upstream_chunk_size_, stream);
162  block_type block{ptr};
163  upstream_blocks_.push_back(block);
164 
165  auto num_blocks = upstream_chunk_size_ / block_size_;
166 
167  auto block_gen = [ptr, this](int index) {
168  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
169  return block_type{static_cast<char*>(ptr) + index * block_size_};
170  };
171  auto first =
172  thrust::make_transform_iterator(thrust::make_counting_iterator(std::size_t{0}), block_gen);
173  return free_list(first, first + num_blocks);
174  }
175 
187  split_block allocate_from_block(block_type const& block, std::size_t size)
188  {
189  return {block, block_type{nullptr}};
190  }
191 
201  block_type free_block(void* ptr, std::size_t size) noexcept
202  {
203  // Deallocating a fixed-size block just inserts it in the free list, which is
204  // handled by the parent class
205  RMM_LOGGING_ASSERT(rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT) <=
206  block_size_);
207  return block_type{ptr};
208  }
209 
218  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
219  cuda_stream_view stream) const override
220  {
221  return std::make_pair(0, 0);
222  }
223 
228  void release()
229  {
230  lock_guard lock(this->get_mutex());
231 
232  for (auto block : upstream_blocks_) {
233  get_upstream()->deallocate(block.pointer(), upstream_chunk_size_);
234  }
235  upstream_blocks_.clear();
236  }
237 
238 #ifdef RMM_DEBUG_PRINT
239  void print()
240  {
241  lock_guard lock(this->get_mutex());
242 
243  auto const [free, total] = get_upstream()->get_mem_info(rmm::cuda_stream_default);
244  std::cout << "GPU free memory: " << free << " total: " << total << "\n";
245 
246  std::cout << "upstream_blocks: " << upstream_blocks_.size() << "\n";
247  std::size_t upstream_total{0};
248 
249  for (auto blocks : upstream_blocks_) {
250  blocks.print();
251  upstream_total += upstream_chunk_size_;
252  }
253  std::cout << "total upstream: " << upstream_total << " B\n";
254 
255  this->print_free_blocks();
256  }
257 #endif
258 
267  std::pair<std::size_t, std::size_t> free_list_summary(free_list const& blocks)
268  {
269  return blocks.is_empty() ? std::make_pair(std::size_t{0}, std::size_t{0})
270  : std::make_pair(block_size_, blocks.size() * block_size_);
271  }
272 
273  private:
274  Upstream* upstream_mr_; // The resource from which to allocate new blocks
275 
276  std::size_t const block_size_; // size of blocks this MR allocates
277  std::size_t const upstream_chunk_size_; // size of chunks allocated from heap MR
278 
279  // blocks allocated from heap: so they can be easily freed
280  std::vector<block_type> upstream_blocks_;
281 };
282 
283 } // namespace rmm::mr
rmm::mr::fixed_size_memory_resource::release
void release()
free all memory allocated using the upstream resource.
Definition: fixed_size_memory_resource.hpp:228
rmm::mr::fixed_size_memory_resource::do_get_mem_info
std::pair< std::size_t, std::size_t > do_get_mem_info(cuda_stream_view stream) const override
Get free and available memory for memory resource.
Definition: fixed_size_memory_resource.hpp:218
rmm::mr::device_memory_resource::allocate
void * allocate(std::size_t bytes, cuda_stream_view stream=cuda_stream_view{})
Allocates memory of size at least bytes.
Definition: device_memory_resource.hpp:106
rmm::cuda_stream_view
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:34
rmm::mr::fixed_size_memory_resource::free_list_summary
std::pair< std::size_t, std::size_t > free_list_summary(free_list const &blocks)
Get the largest available block size and total free size in the specified free list.
Definition: fixed_size_memory_resource.hpp:267
rmm::mr::fixed_size_memory_resource::free_block
block_type free_block(void *ptr, std::size_t size) noexcept
Finds, frees and returns the block associated with pointer.
Definition: fixed_size_memory_resource.hpp:201
rmm::mr::detail::free_list::size
size_type size() const noexcept
The size of the free list in blocks.
Definition: free_list.hpp:100
rmm::mr::detail::stream_ordered_memory_resource< fixed_size_memory_resource< Upstream >, detail::fixed_size_free_list >::split_block
std::pair< block_type, block_type > split_block
Pair representing a block that has been split for allocation.
Definition: stream_ordered_memory_resource.hpp:120
rmm::mr::detail::free_list::is_empty
bool is_empty() const noexcept
checks whether the free_list is empty.
Definition: free_list.hpp:108
rmm::mr::detail::stream_ordered_memory_resource
Base class for a stream-ordered memory resource.
Definition: stream_ordered_memory_resource.hpp:75
rmm::mr::fixed_size_memory_resource
A device_memory_resource which allocates memory blocks of a single fixed size.
Definition: fixed_size_memory_resource.hpp:46
rmm::mr::detail::fixed_size_free_list
Definition: fixed_size_free_list.hpp:26
rmm::mr::fixed_size_memory_resource::allocate_from_block
split_block allocate_from_block(block_type const &block, std::size_t size)
Splits block if necessary to return a pointer to memory of size bytes.
Definition: fixed_size_memory_resource.hpp:187