arena.hpp
1 /*
2  * Copyright (c) 2019-2022, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <rmm/cuda_stream_view.hpp>
20 #include <rmm/detail/aligned.hpp>
21 #include <rmm/detail/cuda_util.hpp>
22 #include <rmm/detail/error.hpp>
23 #include <rmm/logger.hpp>
24 
25 #include <cuda_runtime_api.h>
26 
27 #include <spdlog/common.h>
28 #include <spdlog/fmt/ostr.h>
29 
30 #include <algorithm>
31 #include <cstddef>
32 #include <limits>
33 #include <memory>
34 #include <mutex>
35 #include <numeric>
36 #include <optional>
37 #include <set>
38 
39 namespace rmm::mr::detail::arena {
40 
47 inline std::size_t align_to_size_class(std::size_t value) noexcept
48 {
49  // See http://jemalloc.net/jemalloc.3.html.
50  // NOLINTBEGIN(readability-magic-numbers,cppcoreguidelines-avoid-magic-numbers)
51  static std::array<std::size_t, 117> size_classes{
52  // clang-format off
53  // Spacing 256:
54  256UL, 512UL, 768UL, 1024UL, 1280UL, 1536UL, 1792UL, 2048UL,
55  // Spacing 512:
56  2560UL, 3072UL, 3584UL, 4096UL,
57  // Spacing 1 KiB:
58  5UL << 10, 6UL << 10, 7UL << 10, 8UL << 10,
59  // Spacing 2 KiB:
60  10UL << 10, 12UL << 10, 14UL << 10, 16UL << 10,
61  // Spacing 4 KiB:
62  20UL << 10, 24UL << 10, 28UL << 10, 32UL << 10,
63  // Spacing 8 KiB:
64  40UL << 10, 48UL << 10, 54UL << 10, 64UL << 10,
65  // Spacing 16 KiB:
66  80UL << 10, 96UL << 10, 112UL << 10, 128UL << 10,
67  // Spacing 32 KiB:
68  160UL << 10, 192UL << 10, 224UL << 10, 256UL << 10,
69  // Spacing 64 KiB:
70  320UL << 10, 384UL << 10, 448UL << 10, 512UL << 10,
71  // Spacing 128 KiB:
72  640UL << 10, 768UL << 10, 896UL << 10, 1UL << 20,
73  // Spacing 256 KiB:
74  1280UL << 10, 1536UL << 10, 1792UL << 10, 2UL << 20,
75  // Spacing 512 KiB:
76  2560UL << 10, 3UL << 20, 3584UL << 10, 4UL << 20,
77  // Spacing 1 MiB:
78  5UL << 20, 6UL << 20, 7UL << 20, 8UL << 20,
79  // Spacing 2 MiB:
80  10UL << 20, 12UL << 20, 14UL << 20, 16UL << 20,
81  // Spacing 4 MiB:
82  20UL << 20, 24UL << 20, 28UL << 20, 32UL << 20,
83  // Spacing 8 MiB:
84  40UL << 20, 48UL << 20, 56UL << 20, 64UL << 20,
85  // Spacing 16 MiB:
86  80UL << 20, 96UL << 20, 112UL << 20, 128UL << 20,
87  // Spacing 32 MiB:
88  160UL << 20, 192UL << 20, 224UL << 20, 256UL << 20,
89  // Spacing 64 MiB:
90  320UL << 20, 384UL << 20, 448UL << 20, 512UL << 20,
91  // Spacing 128 MiB:
92  640UL << 20, 768UL << 20, 896UL << 20, 1UL << 30,
93  // Spacing 256 MiB:
94  1280UL << 20, 1536UL << 20, 1792UL << 20, 2UL << 30,
95  // Spacing 512 MiB:
96  2560UL << 20, 3UL << 30, 3584UL << 20, 4UL << 30,
97  // Spacing 1 GiB:
98  5UL << 30, 6UL << 30, 7UL << 30, 8UL << 30,
99  // Spacing 2 GiB:
100  10UL << 30, 12UL << 30, 14UL << 30, 16UL << 30,
101  // Spacing 4 GiB:
102  20UL << 30, 24UL << 30, 28UL << 30, 32UL << 30,
103  // Spacing 8 GiB:
104  40UL << 30, 48UL << 30, 56UL << 30, 64UL << 30,
105  // Spacing 16 GiB:
106  80UL << 30, 96UL << 30, 112UL << 30, 128UL << 30,
107  // Spacing 32 Gib:
108  160UL << 30, 192UL << 30, 224UL << 30, 256UL << 30,
109  // Catch all:
110  std::numeric_limits<std::size_t>::max()
111  // clang-format on
112  };
113  // NOLINTEND(readability-magic-numbers,cppcoreguidelines-avoid-magic-numbers)
114 
115  auto* bound = std::lower_bound(size_classes.begin(), size_classes.end(), value);
116  RMM_LOGGING_ASSERT(bound != size_classes.end());
117  return *bound;
118 }
119 
123 class byte_span {
124  public:
128  byte_span() = default;
129 
136  byte_span(void* pointer, std::size_t size) : pointer_{static_cast<char*>(pointer)}, size_{size}
137  {
138  RMM_LOGGING_ASSERT(pointer != nullptr);
139  RMM_LOGGING_ASSERT(size > 0);
140  }
141 
143  [[nodiscard]] char* pointer() const { return pointer_; }
144 
146  [[nodiscard]] std::size_t size() const { return size_; }
147 
149  [[nodiscard]] char* end() const
150  {
151  return pointer_ + size_; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
152  }
153 
155  [[nodiscard]] bool is_valid() const { return pointer_ != nullptr && size_ > 0; }
156 
158  bool operator<(byte_span const& span) const
159  {
160  RMM_LOGGING_ASSERT(span.is_valid());
161  return pointer_ < span.pointer_;
162  }
163 
164  private:
165  char* pointer_{};
166  std::size_t size_{};
167 };
168 
170 template <typename T>
171 inline auto total_memory_size(std::set<T> const& spans)
172 {
173  return std::accumulate(
174  spans.cbegin(), spans.cend(), std::size_t{}, [](auto const& lhs, auto const& rhs) {
175  return lhs + rhs.size();
176  });
177 }
178 
182 class block final : public byte_span {
183  public:
184  using byte_span::byte_span;
185 
192  [[nodiscard]] bool fits(std::size_t bytes) const
193  {
194  RMM_LOGGING_ASSERT(is_valid());
195  RMM_LOGGING_ASSERT(bytes > 0);
196  return size() >= bytes;
197  }
198 
205  [[nodiscard]] bool is_contiguous_before(block const& blk) const
206  {
207  RMM_LOGGING_ASSERT(is_valid());
208  RMM_LOGGING_ASSERT(blk.is_valid());
209  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
210  return pointer() + size() == blk.pointer();
211  }
212 
219  [[nodiscard]] std::pair<block, block> split(std::size_t bytes) const
220  {
221  RMM_LOGGING_ASSERT(is_valid());
222  RMM_LOGGING_ASSERT(size() > bytes);
223  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
224  return {{pointer(), bytes}, {pointer() + bytes, size() - bytes}};
225  }
226 
235  [[nodiscard]] block merge(block const& blk) const
236  {
237  RMM_LOGGING_ASSERT(is_contiguous_before(blk));
238  return {pointer(), size() + blk.size()};
239  }
240 };
241 
243 inline bool block_size_compare(block const& lhs, block const& rhs)
244 {
245  RMM_LOGGING_ASSERT(lhs.is_valid());
246  RMM_LOGGING_ASSERT(rhs.is_valid());
247  return lhs.size() < rhs.size();
248 }
249 
254 class superblock final : public byte_span {
255  public:
257  static constexpr std::size_t minimum_size{1UL << 20};
259  static constexpr std::size_t maximum_size{1UL << 40};
260 
264  superblock() = default;
265 
272  superblock(void* pointer, std::size_t size) : byte_span{pointer, size}
273  {
274  RMM_LOGGING_ASSERT(size >= minimum_size);
275  RMM_LOGGING_ASSERT(size <= maximum_size);
276  free_blocks_.emplace(pointer, size);
277  }
278 
279  // Disable copy semantics.
280  superblock(superblock const&) = delete;
281  superblock& operator=(superblock const&) = delete;
282  // Allow move semantics.
283  superblock(superblock&&) noexcept = default;
284  superblock& operator=(superblock&&) noexcept = default;
285 
286  ~superblock() = default;
287 
293  [[nodiscard]] bool empty() const
294  {
295  RMM_LOGGING_ASSERT(is_valid());
296  return free_blocks_.size() == 1 && free_blocks_.cbegin()->size() == size();
297  }
298 
304  [[nodiscard]] std::size_t free_blocks() const
305  {
306  RMM_LOGGING_ASSERT(is_valid());
307  return free_blocks_.size();
308  }
309 
316  [[nodiscard]] bool contains(block const& blk) const
317  {
318  RMM_LOGGING_ASSERT(is_valid());
319  RMM_LOGGING_ASSERT(blk.is_valid());
320  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
321  return pointer() <= blk.pointer() && pointer() + size() >= blk.pointer() + blk.size();
322  }
323 
330  [[nodiscard]] bool fits(std::size_t bytes) const
331  {
332  RMM_LOGGING_ASSERT(is_valid());
333  return std::any_of(free_blocks_.cbegin(), free_blocks_.cend(), [bytes](auto const& blk) {
334  return blk.fits(bytes);
335  });
336  }
337 
345  [[nodiscard]] bool is_contiguous_before(superblock const& sblk) const
346  {
347  RMM_LOGGING_ASSERT(is_valid());
348  RMM_LOGGING_ASSERT(sblk.is_valid());
349  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
350  return empty() && sblk.empty() && pointer() + size() == sblk.pointer();
351  }
352 
359  [[nodiscard]] std::pair<superblock, superblock> split(std::size_t bytes) const
360  {
361  RMM_LOGGING_ASSERT(is_valid());
362  RMM_LOGGING_ASSERT(empty() && bytes >= minimum_size && size() >= bytes + minimum_size);
363  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
364  return {superblock{pointer(), bytes}, superblock{pointer() + bytes, size() - bytes}};
365  }
366 
375  [[nodiscard]] superblock merge(superblock const& sblk) const
376  {
377  RMM_LOGGING_ASSERT(is_contiguous_before(sblk));
378  return {pointer(), size() + sblk.size()};
379  }
380 
387  block first_fit(std::size_t size)
388  {
389  RMM_LOGGING_ASSERT(is_valid());
390  RMM_LOGGING_ASSERT(size > 0);
391 
392  auto fits = [size](auto const& blk) { return blk.fits(size); };
393  auto const iter = std::find_if(free_blocks_.cbegin(), free_blocks_.cend(), fits);
394  if (iter == free_blocks_.cend()) { return {}; }
395 
396  // Remove the block from the free list.
397  auto const blk = *iter;
398  auto const next = free_blocks_.erase(iter);
399 
400  if (blk.size() > size) {
401  // Split the block and put the remainder back.
402  auto const split = blk.split(size);
403  free_blocks_.insert(next, split.second);
404  return split.first;
405  }
406  return blk;
407  }
408 
414  void coalesce(block const& blk) // NOLINT(readability-function-cognitive-complexity)
415  {
416  RMM_LOGGING_ASSERT(is_valid());
417  RMM_LOGGING_ASSERT(blk.is_valid());
418  RMM_LOGGING_ASSERT(contains(blk));
419 
420  // Find the right place (in ascending address order) to insert the block.
421  auto const next = free_blocks_.lower_bound(blk);
422  auto const previous = next == free_blocks_.cbegin() ? next : std::prev(next);
423 
424  // Coalesce with neighboring blocks.
425  bool const merge_prev = previous != free_blocks_.cend() && previous->is_contiguous_before(blk);
426  bool const merge_next = next != free_blocks_.cend() && blk.is_contiguous_before(*next);
427 
428  if (merge_prev && merge_next) {
429  auto const merged = previous->merge(blk).merge(*next);
430  free_blocks_.erase(previous);
431  auto const iter = free_blocks_.erase(next);
432  free_blocks_.insert(iter, merged);
433  } else if (merge_prev) {
434  auto const merged = previous->merge(blk);
435  auto const iter = free_blocks_.erase(previous);
436  free_blocks_.insert(iter, merged);
437  } else if (merge_next) {
438  auto const merged = blk.merge(*next);
439  auto const iter = free_blocks_.erase(next);
440  free_blocks_.insert(iter, merged);
441  } else {
442  free_blocks_.insert(next, blk);
443  }
444  }
445 
450  [[nodiscard]] std::size_t total_free_size() const { return total_memory_size(free_blocks_); }
451 
456  [[nodiscard]] std::size_t max_free_size() const
457  {
458  if (free_blocks_.empty()) { return 0; }
459  return std::max_element(free_blocks_.cbegin(), free_blocks_.cend(), block_size_compare)->size();
460  }
461 
462  private:
464  std::set<block> free_blocks_{};
465 };
466 
468 inline auto total_free_size(std::set<superblock> const& superblocks)
469 {
470  return std::accumulate(
471  superblocks.cbegin(), superblocks.cend(), std::size_t{}, [](auto const& lhs, auto const& rhs) {
472  return lhs + rhs.total_free_size();
473  });
474 }
475 
477 inline auto max_free_size(std::set<superblock> const& superblocks)
478 {
479  std::size_t size{};
480  for (auto const& sblk : superblocks) {
481  size = std::max(size, sblk.max_free_size());
482  }
483  return size;
484 };
485 
494 template <typename Upstream>
495 class global_arena final {
496  public:
506  global_arena(Upstream* upstream_mr, std::optional<std::size_t> arena_size)
507  : upstream_mr_{upstream_mr}
508  {
509  RMM_EXPECTS(nullptr != upstream_mr_, "Unexpected null upstream pointer.");
510  auto const size = rmm::detail::align_down(arena_size.value_or(default_size()),
511  rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
512  RMM_EXPECTS(size >= superblock::minimum_size,
513  "Arena size smaller than minimum superblock size.");
514  initialize(size);
515  }
516 
517  // Disable copy (and move) semantics.
518  global_arena(global_arena const&) = delete;
519  global_arena& operator=(global_arena const&) = delete;
520  global_arena(global_arena&&) noexcept = delete;
521  global_arena& operator=(global_arena&&) noexcept = delete;
522 
528  {
529  std::lock_guard lock(mtx_);
530  upstream_mr_->deallocate(upstream_block_.pointer(), upstream_block_.size());
531  }
532 
539  bool handles(std::size_t size) const { return size > superblock::minimum_size; }
540 
547  superblock acquire(std::size_t size)
548  {
549  // Superblocks should only be acquired if the size is not directly handled by the global arena.
550  RMM_LOGGING_ASSERT(!handles(size));
551  std::lock_guard lock(mtx_);
552  return first_fit(size);
553  }
554 
560  void release(superblock&& sblk)
561  {
562  RMM_LOGGING_ASSERT(sblk.is_valid());
563  std::lock_guard lock(mtx_);
564  coalesce(std::move(sblk));
565  }
566 
572  void release(std::set<superblock>& superblocks)
573  {
574  std::lock_guard lock(mtx_);
575  while (!superblocks.empty()) {
576  auto sblk = std::move(superblocks.extract(superblocks.cbegin()).value());
577  RMM_LOGGING_ASSERT(sblk.is_valid());
578  coalesce(std::move(sblk));
579  }
580  }
581 
588  void* allocate(std::size_t size)
589  {
590  RMM_LOGGING_ASSERT(handles(size));
591  std::lock_guard lock(mtx_);
592  auto sblk = first_fit(size);
593  if (sblk.is_valid()) {
594  auto blk = sblk.first_fit(size);
595  superblocks_.insert(std::move(sblk));
596  return blk.pointer();
597  }
598  return nullptr;
599  }
600 
610  bool deallocate(void* ptr, std::size_t size, cuda_stream_view stream)
611  {
612  RMM_LOGGING_ASSERT(handles(size));
613  stream.synchronize_no_throw();
614  return deallocate(ptr, size);
615  }
616 
625  bool deallocate(void* ptr, std::size_t bytes)
626  {
627  std::lock_guard lock(mtx_);
628 
629  block const blk{ptr, bytes};
630  auto const iter = std::find_if(superblocks_.cbegin(),
631  superblocks_.cend(),
632  [&](auto const& sblk) { return sblk.contains(blk); });
633  if (iter == superblocks_.cend()) { return false; }
634 
635  auto sblk = std::move(superblocks_.extract(iter).value());
636  sblk.coalesce(blk);
637  if (sblk.empty()) {
638  coalesce(std::move(sblk));
639  } else {
640  superblocks_.insert(std::move(sblk));
641  }
642  return true;
643  }
644 
650  void dump_memory_log(std::shared_ptr<spdlog::logger> const& logger) const
651  {
652  std::lock_guard lock(mtx_);
653 
654  logger->info(" Arena size: {}", rmm::detail::bytes{upstream_block_.size()});
655  logger->info(" # superblocks: {}", superblocks_.size());
656  if (!superblocks_.empty()) {
657  logger->debug(" Total size of superblocks: {}",
658  rmm::detail::bytes{total_memory_size(superblocks_)});
659  auto const total_free = total_free_size(superblocks_);
660  auto const max_free = max_free_size(superblocks_);
661  auto const fragmentation = (1 - max_free / static_cast<double>(total_free)) * 100;
662  logger->info(" Total free memory: {}", rmm::detail::bytes{total_free});
663  logger->info(" Largest block of free memory: {}", rmm::detail::bytes{max_free});
664  logger->info(" Fragmentation: {:.2f}%", fragmentation);
665 
666  auto index = 0;
667  char* prev_end{};
668  for (auto const& sblk : superblocks_) {
669  if (prev_end == nullptr) { prev_end = sblk.pointer(); }
670  logger->debug(
671  " Superblock {}: start={}, end={}, size={}, empty={}, # free blocks={}, max free={}, "
672  "gap={}",
673  index,
674  fmt::ptr(sblk.pointer()),
675  fmt::ptr(sblk.end()),
676  rmm::detail::bytes{sblk.size()},
677  sblk.empty(),
678  sblk.free_blocks(),
679  rmm::detail::bytes{sblk.max_free_size()},
680  rmm::detail::bytes{static_cast<size_t>(sblk.pointer() - prev_end)});
681  prev_end = sblk.end();
682  index++;
683  }
684  }
685  }
686 
687  private:
692  constexpr std::size_t default_size() const
693  {
694  auto const [free, total] = rmm::detail::available_device_memory();
695  return free / 2;
696  }
697 
703  void initialize(std::size_t size)
704  {
705  upstream_block_ = {upstream_mr_->allocate(size), size};
706  superblocks_.emplace(upstream_block_.pointer(), size);
707  }
708 
723  superblock first_fit(std::size_t size)
724  {
725  auto const iter = std::find_if(superblocks_.cbegin(),
726  superblocks_.cend(),
727  [=](auto const& sblk) { return sblk.fits(size); });
728  if (iter == superblocks_.cend()) { return {}; }
729 
730  auto sblk = std::move(superblocks_.extract(iter).value());
731  auto const min_size = std::max(superblock::minimum_size, size);
732  if (sblk.empty() && sblk.size() >= min_size + superblock::minimum_size) {
733  // Split the superblock and put the remainder back.
734  auto [head, tail] = sblk.split(min_size);
735  superblocks_.insert(std::move(tail));
736  return std::move(head);
737  }
738  return sblk;
739  }
740 
746  void coalesce(superblock&& sblk)
747  {
748  RMM_LOGGING_ASSERT(sblk.is_valid());
749 
750  // Find the right place (in ascending address order) to insert the block.
751  auto const next = superblocks_.lower_bound(sblk);
752  auto const previous = next == superblocks_.cbegin() ? next : std::prev(next);
753 
754  // Coalesce with neighboring blocks.
755  bool const merge_prev = previous != superblocks_.cend() && previous->is_contiguous_before(sblk);
756  bool const merge_next = next != superblocks_.cend() && sblk.is_contiguous_before(*next);
757 
758  if (merge_prev && merge_next) {
759  auto prev_sb = std::move(superblocks_.extract(previous).value());
760  auto next_sb = std::move(superblocks_.extract(next).value());
761  auto merged = prev_sb.merge(sblk).merge(next_sb);
762  superblocks_.insert(std::move(merged));
763  } else if (merge_prev) {
764  auto prev_sb = std::move(superblocks_.extract(previous).value());
765  auto merged = prev_sb.merge(sblk);
766  superblocks_.insert(std::move(merged));
767  } else if (merge_next) {
768  auto next_sb = std::move(superblocks_.extract(next).value());
769  auto merged = sblk.merge(next_sb);
770  superblocks_.insert(std::move(merged));
771  } else {
772  superblocks_.insert(std::move(sblk));
773  }
774  }
775 
777  Upstream* upstream_mr_;
779  block upstream_block_;
781  std::set<superblock> superblocks_;
783  mutable std::mutex mtx_;
784 };
785 
795 template <typename Upstream>
796 class arena {
797  public:
803  explicit arena(global_arena<Upstream>& global_arena) : global_arena_{global_arena} {}
804 
805  // Disable copy (and move) semantics.
806  arena(arena const&) = delete;
807  arena& operator=(arena const&) = delete;
808  arena(arena&&) noexcept = delete;
809  arena& operator=(arena&&) noexcept = delete;
810 
811  ~arena() = default;
812 
819  void* allocate(std::size_t size)
820  {
821  if (global_arena_.handles(size)) { return global_arena_.allocate(size); }
822  std::lock_guard lock(mtx_);
823  return get_block(size).pointer();
824  }
825 
835  bool deallocate(void* ptr, std::size_t size, cuda_stream_view stream)
836  {
837  if (global_arena_.handles(size) && global_arena_.deallocate(ptr, size, stream)) { return true; }
838  return deallocate(ptr, size);
839  }
840 
849  bool deallocate(void* ptr, std::size_t size)
850  {
851  std::lock_guard lock(mtx_);
852  return deallocate_from_superblock({ptr, size});
853  }
854 
858  void clean()
859  {
860  std::lock_guard lock(mtx_);
861  global_arena_.release(superblocks_);
862  superblocks_.clear();
863  }
864 
868  void defragment()
869  {
870  std::lock_guard lock(mtx_);
871  while (true) {
872  auto const iter = std::find_if(
873  superblocks_.cbegin(), superblocks_.cend(), [](auto const& sblk) { return sblk.empty(); });
874  if (iter == superblocks_.cend()) { return; }
875  global_arena_.release(std::move(superblocks_.extract(iter).value()));
876  }
877  }
878 
879  private:
886  block get_block(std::size_t size)
887  {
888  // Find the first-fit free block.
889  auto const blk = first_fit(size);
890  if (blk.is_valid()) { return blk; }
891 
892  // No existing larger blocks available, so grow the arena and obtain a superblock.
893  return expand_arena(size);
894  }
895 
909  block first_fit(std::size_t size)
910  {
911  auto const iter = std::find_if(superblocks_.cbegin(),
912  superblocks_.cend(),
913  [size](auto const& sblk) { return sblk.fits(size); });
914  if (iter == superblocks_.cend()) { return {}; }
915 
916  auto sblk = std::move(superblocks_.extract(iter).value());
917  auto const blk = sblk.first_fit(size);
918  superblocks_.insert(std::move(sblk));
919  return blk;
920  }
921 
929  bool deallocate_from_superblock(block const& blk)
930  {
931  auto const iter = std::find_if(superblocks_.cbegin(),
932  superblocks_.cend(),
933  [&](auto const& sblk) { return sblk.contains(blk); });
934  if (iter == superblocks_.cend()) { return false; }
935 
936  auto sblk = std::move(superblocks_.extract(iter).value());
937  sblk.coalesce(blk);
938  superblocks_.insert(std::move(sblk));
939  return true;
940  }
941 
948  block expand_arena(std::size_t size)
949  {
950  auto sblk = global_arena_.acquire(size);
951  if (sblk.is_valid()) {
952  RMM_LOGGING_ASSERT(sblk.size() >= superblock::minimum_size);
953  auto const blk = sblk.first_fit(size);
954  superblocks_.insert(std::move(sblk));
955  return blk;
956  }
957  return {};
958  }
959 
961  global_arena<Upstream>& global_arena_;
963  std::set<superblock> superblocks_;
965  mutable std::mutex mtx_;
966 };
967 
976 template <typename Upstream>
978  public:
979  explicit arena_cleaner(std::shared_ptr<arena<Upstream>> const& arena) : arena_(arena) {}
980 
981  // Disable copy (and move) semantics.
982  arena_cleaner(arena_cleaner const&) = delete;
983  arena_cleaner& operator=(arena_cleaner const&) = delete;
984  arena_cleaner(arena_cleaner&&) noexcept = delete;
985  arena_cleaner& operator=(arena_cleaner&&) = delete;
986 
987  ~arena_cleaner()
988  {
989  if (!arena_.expired()) {
990  auto arena_ptr = arena_.lock();
991  arena_ptr->clean();
992  }
993  }
994 
995  private:
997  std::weak_ptr<arena<Upstream>> arena_;
998 };
999 
1000 } // namespace rmm::mr::detail::arena
rmm::mr::detail::arena::byte_span::pointer
char * pointer() const
Returns the underlying pointer.
Definition: arena.hpp:143
rmm::mr::detail::arena::byte_span::size
std::size_t size() const
Returns the size of the span.
Definition: arena.hpp:146
rmm::mr::detail::arena::block::merge
block merge(block const &blk) const
Coalesce two contiguous blocks into one.
Definition: arena.hpp:235
rmm::mr::detail::arena::arena::defragment
void defragment()
Defragment the arena and release empty superblock to the global arena.
Definition: arena.hpp:868
rmm::mr::detail::arena::superblock
Represents a large chunk of memory that is exchanged between the global arena and per-thread arenas.
Definition: arena.hpp:254
rmm::mr::detail::arena::superblock::max_free_size
std::size_t max_free_size() const
Find the max free block size.
Definition: arena.hpp:456
rmm::mr::detail::arena::global_arena::release
void release(std::set< superblock > &superblocks)
Release a set of superblocks from a dying arena.
Definition: arena.hpp:572
rmm::detail::bytes
Represent a size in number of bytes.
Definition: logger.hpp:75
rmm::mr::detail::arena::superblock::free_blocks
std::size_t free_blocks() const
Return the number of free blocks.
Definition: arena.hpp:304
rmm::cuda_stream_view
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:34
rmm::mr::detail::arena::superblock::is_contiguous_before
bool is_contiguous_before(superblock const &sblk) const
Verifies whether this superblock can be merged to the beginning of superblock s.
Definition: arena.hpp:345
rmm::mr::detail::arena::superblock::maximum_size
static constexpr std::size_t maximum_size
Maximum size of a superblock (1 TiB), as a sanity check.
Definition: arena.hpp:259
rmm::mr::detail::arena::global_arena::allocate
void * allocate(std::size_t size)
Allocate a large block directly.
Definition: arena.hpp:588
rmm::mr::detail::block_base::is_valid
bool is_valid() const
Returns true if this block is valid (non-null), false otherwise.
Definition: free_list.hpp:34
rmm::mr::detail::arena::global_arena::global_arena
global_arena(Upstream *upstream_mr, std::optional< std::size_t > arena_size)
Construct a global arena.
Definition: arena.hpp:506
rmm::mr::detail::arena::byte_span::operator<
bool operator<(byte_span const &span) const
Used by std::set to compare spans.
Definition: arena.hpp:158
rmm::mr::detail::arena::superblock::fits
bool fits(std::size_t bytes) const
Can this superblock fit bytes bytes?
Definition: arena.hpp:330
rmm::mr::detail::arena::superblock::contains
bool contains(block const &blk) const
Whether this superblock contains the given block.
Definition: arena.hpp:316
rmm::mr::detail::block
A simple block structure specifying the size and location of a block of memory, with a flag indicatin...
Definition: coalescing_free_list.hpp:36
rmm::mr::detail::arena::global_arena::release
void release(superblock &&sblk)
Release a superblock.
Definition: arena.hpp:560
rmm::mr::detail::arena::arena::deallocate
bool deallocate(void *ptr, std::size_t size, cuda_stream_view stream)
Deallocate memory pointed to by ptr, and possibly return superblocks to upstream.
Definition: arena.hpp:835
rmm::mr::detail::arena::global_arena::acquire
superblock acquire(std::size_t size)
Acquire a superblock that can fit a block of the given size.
Definition: arena.hpp:547
rmm::mr::detail::arena::superblock::minimum_size
static constexpr std::size_t minimum_size
Minimum size of a superblock (1 MiB).
Definition: arena.hpp:257
rmm::mr::detail::arena::arena_cleaner
RAII-style cleaner for an arena.
Definition: arena.hpp:977
rmm::mr::detail::arena::superblock::superblock
superblock()=default
Construct a default superblock.
rmm::mr::detail::arena::byte_span::is_valid
bool is_valid() const
Returns true if this span is valid (non-null), false otherwise.
Definition: arena.hpp:155
rmm::mr::detail::arena::byte_span::end
char * end() const
Returns the end of the span.
Definition: arena.hpp:149
rmm::mr::detail::arena::global_arena::dump_memory_log
void dump_memory_log(std::shared_ptr< spdlog::logger > const &logger) const
Dump memory to log.
Definition: arena.hpp:650
rmm::mr::detail::arena::global_arena::deallocate
bool deallocate(void *ptr, std::size_t size, cuda_stream_view stream)
Deallocate memory pointed to by ptr.
Definition: arena.hpp:610
rmm::mr::detail::arena::byte_span
Represents a contiguous region of memory.
Definition: arena.hpp:123
rmm::mr::detail::arena::global_arena::deallocate
bool deallocate(void *ptr, std::size_t bytes)
Deallocate memory pointed to by ptr.
Definition: arena.hpp:625
rmm::mr::detail::block::size
std::size_t size() const
Returns the size of the memory represented by this block.
Definition: coalescing_free_list.hpp:55
rmm::mr::detail::arena::block::split
std::pair< block, block > split(std::size_t bytes) const
Split this block into two by the given size.
Definition: arena.hpp:219
rmm::mr::detail::arena::arena
An arena for allocating memory for a thread.
Definition: arena.hpp:796
rmm::mr::detail::arena::superblock::first_fit
block first_fit(std::size_t size)
Get the first free block of at least size bytes.
Definition: arena.hpp:387
rmm::mr::detail::arena::superblock::merge
superblock merge(superblock const &sblk) const
Coalesce two contiguous superblocks into one.
Definition: arena.hpp:375
rmm::mr::detail::arena::superblock::total_free_size
std::size_t total_free_size() const
Find the total free block size.
Definition: arena.hpp:450
rmm::mr::detail::arena::superblock::superblock
superblock(void *pointer, std::size_t size)
Construct a superblock given a pointer and size.
Definition: arena.hpp:272
rmm::mr::detail::arena::superblock::empty
bool empty() const
Is this superblock empty?
Definition: arena.hpp:293
rmm::mr::detail::arena::block::is_contiguous_before
bool is_contiguous_before(block const &blk) const
Verifies whether this block can be merged to the beginning of block blk.
Definition: arena.hpp:205
rmm::cuda_stream_view::synchronize_no_throw
void synchronize_no_throw() const noexcept
Synchronize the viewed CUDA stream. Does not throw if there is an error.
Definition: cuda_stream_view.hpp:88
rmm::mr::detail::arena::superblock::coalesce
void coalesce(block const &blk)
Coalesce the given block with other free blocks.
Definition: arena.hpp:414
rmm::mr::detail::arena::global_arena::handles
bool handles(std::size_t size) const
Should allocation of size bytes be handled by the global arena directly?
Definition: arena.hpp:539
rmm::mr::detail::arena::superblock::split
std::pair< superblock, superblock > split(std::size_t bytes) const
Split this superblock into two by the given size.
Definition: arena.hpp:359
rmm::mr::detail::arena::block::fits
bool fits(std::size_t bytes) const
Is this block large enough to fit bytes bytes?
Definition: arena.hpp:192
rmm::mr::detail::arena::arena::clean
void clean()
Clean the arena and release all superblocks to the global arena.
Definition: arena.hpp:858
rmm::mr::detail::arena::global_arena
The global arena for allocating memory from the upstream memory resource.
Definition: arena.hpp:495
rmm::mr::detail::arena::byte_span::byte_span
byte_span()=default
Construct a default span.
rmm::mr::detail::arena::arena::allocate
void * allocate(std::size_t size)
Allocates memory of size at least size bytes.
Definition: arena.hpp:819
rmm::mr::detail::arena::arena::deallocate
bool deallocate(void *ptr, std::size_t size)
Deallocate memory pointed to by ptr, and possibly return superblocks to upstream.
Definition: arena.hpp:849
rmm::mr::detail::arena::block
Represents a chunk of memory that can be allocated and deallocated.
Definition: arena.hpp:182
rmm::mr::detail::arena::arena::arena
arena(global_arena< Upstream > &global_arena)
Construct an arena.
Definition: arena.hpp:803