9 #include <rmm/detail/error.hpp>
10 #include <rmm/detail/export.hpp>
11 #include <rmm/detail/format.hpp>
12 #include <rmm/detail/logging_assert.hpp>
13 #include <rmm/detail/thrust_namespace.h>
14 #include <rmm/logger.hpp>
15 #include <rmm/mr/device/detail/coalescing_free_list.hpp>
16 #include <rmm/mr/device/detail/stream_ordered_memory_resource.hpp>
21 #include <cuda/std/type_traits>
22 #include <cuda_runtime_api.h>
23 #include <thrust/iterator/counting_iterator.h>
24 #include <thrust/iterator/transform_iterator.h>
32 namespace RMM_NAMESPACE {
51 template <
class PoolResource,
class Upstream,
class Property,
class =
void>
57 template <
class PoolResource,
class Upstream,
class Property>
61 cuda::std::enable_if_t<!cuda::has_property<Upstream, Property>>> {
62 #if defined(__GNUC__) && !defined(__clang__)
64 #pragma GCC diagnostic push
65 #pragma GCC diagnostic ignored "-Wnon-template-friend"
71 friend void get_property(const PoolResource&, Property) = delete;
72 #if defined(__GNUC__) && !defined(__clang__)
73 #pragma GCC diagnostic pop
88 template <
typename Upstream>
91 maybe_remove_property<pool_memory_resource<Upstream>, Upstream, cuda::mr::device_accessible>,
92 public detail::stream_ordered_memory_resource<pool_memory_resource<Upstream>,
93 detail::coalescing_free_list>,
94 public cuda::forward_property<pool_memory_resource<Upstream>, Upstream> {
97 detail::coalescing_free_list>;
114 std::size_t initial_pool_size,
115 std::optional<std::size_t> maximum_pool_size = std::nullopt)
116 : upstream_mr_{upstream_mr}
119 "Error, Initial pool size required to be a multiple of 256 bytes");
121 "Error, Maximum pool size required to be a multiple of 256 bytes");
123 initialize_pool(initial_pool_size, maximum_pool_size);
142 std::size_t initial_pool_size,
143 std::optional<std::size_t> maximum_pool_size = std::nullopt)
147 "Error, Initial pool size required to be a multiple of 256 bytes");
149 "Error, Maximum pool size required to be a multiple of 256 bytes");
151 initialize_pool(initial_pool_size, maximum_pool_size);
169 template <
typename Upstream2 = Upstream>
171 std::size_t initial_pool_size,
172 std::optional<std::size_t> maximum_pool_size = std::nullopt)
204 [[nodiscard]] std::size_t
pool_size() const noexcept {
return current_pool_size_; }
209 using typename detail::stream_ordered_memory_resource<pool_memory_resource<Upstream>,
210 detail::coalescing_free_list>::split_block;
223 return std::numeric_limits<std::size_t>::max();
243 auto report_error = [&](
const char* reason) {
244 RMM_LOG_ERROR(
"[A][Stream %s][Upstream %zuB][FAILURE maximum pool size exceeded: %s]",
245 rmm::detail::format_stream(stream),
248 auto const msg = std::string(
"Maximum pool size exceeded (failed to allocate ") +
249 rmm::detail::format_bytes(min_size) + std::string(
"): ") + reason;
253 while (try_size >= min_size) {
255 auto block = block_from_upstream(try_size, stream);
256 current_pool_size_ += block.size();
258 }
catch (std::exception
const& e) {
259 if (try_size == min_size) { report_error(e.what()); }
261 try_size = std::max(min_size, try_size / 2);
264 auto const max_size = maximum_pool_size_.value_or(std::numeric_limits<std::size_t>::max());
265 auto const msg = std::string(
"Not enough room to grow, current/max/try size = ") +
266 rmm::detail::format_bytes(pool_size()) +
", " +
267 rmm::detail::format_bytes(max_size) +
", " +
268 rmm::detail::format_bytes(min_size);
269 report_error(msg.c_str());
281 void initialize_pool(std::size_t initial_size, std::optional<std::size_t> maximum_size)
283 current_pool_size_ = 0;
284 maximum_pool_size_ = maximum_size;
287 initial_size <= maximum_pool_size_.value_or(std::numeric_limits<std::size_t>::max()),
288 "Initial pool size exceeds the maximum pool size!");
290 if (initial_size > 0) {
312 return try_to_expand(size_to_grow(size), size, stream);
329 if (maximum_pool_size_.has_value()) {
330 auto const unaligned_remaining = maximum_pool_size_.value() - pool_size();
333 return (aligned_size <= remaining) ? std::max(aligned_size, remaining / 2) : 0;
335 return std::max(size, pool_size());
348 RMM_LOG_DEBUG(
"[A][Stream %s][Upstream %zuB]", rmm::detail::format_stream(stream), size);
350 if (size == 0) {
return {}; }
352 void* ptr = get_upstream_resource().allocate(stream, size);
353 return *upstream_blocks_.emplace(
static_cast<char*
>(ptr), size,
true).first;
368 block_type const alloc{block.pointer(), size, block.is_head()};
369 #ifdef RMM_POOL_TRACK_ALLOCATIONS
370 allocated_blocks_.insert(alloc);
373 auto rest = (block.size() > size)
375 ?
block_type{block.pointer() + size, block.size() - size,
false}
377 return {alloc, rest};
390 #ifdef RMM_POOL_TRACK_ALLOCATIONS
392 auto const iter = allocated_blocks_.find(
static_cast<char*
>(ptr));
393 RMM_LOGGING_ASSERT(iter != allocated_blocks_.end());
396 RMM_LOGGING_ASSERT(block.size() ==
rmm::align_up(size, allocation_alignment));
397 allocated_blocks_.erase(iter);
401 auto const iter = upstream_blocks_.find(
static_cast<char*
>(ptr));
402 return block_type{
static_cast<char*
>(ptr), size, (iter != upstream_blocks_.end())};
414 for (
auto block : upstream_blocks_) {
415 get_upstream_resource().deallocate_sync(block.pointer(), block.size());
417 upstream_blocks_.clear();
418 #ifdef RMM_POOL_TRACK_ALLOCATIONS
419 allocated_blocks_.clear();
422 current_pool_size_ = 0;
425 #ifdef RMM_DEBUG_PRINT
434 lock_guard lock(this->get_mutex());
437 std::cout <<
"GPU free memory: " << free <<
" total: " << total <<
"\n";
439 std::cout <<
"upstream_blocks: " << upstream_blocks_.size() <<
"\n";
440 std::size_t upstream_total{0};
442 for (
auto blocks : upstream_blocks_) {
444 upstream_total += blocks.size();
446 std::cout <<
"total upstream: " << upstream_total <<
" B\n";
448 #ifdef RMM_POOL_TRACK_ALLOCATIONS
449 std::cout <<
"allocated_blocks: " << allocated_blocks_.size() <<
"\n";
450 for (
auto block : allocated_blocks_)
454 this->print_free_blocks();
468 std::size_t largest{};
470 std::for_each(blocks.cbegin(), blocks.cend(), [&largest, &total](
auto const& block) {
471 total += block.size();
472 largest = std::max(largest, block.size());
474 return {largest, total};
480 std::size_t current_pool_size_{};
481 std::optional<std::size_t> maximum_pool_size_{};
483 #ifdef RMM_POOL_TRACK_ALLOCATIONS
484 std::set<block_type, rmm::mr::detail::compare_blocks<block_type>> allocated_blocks_;
488 std::set<block_type, rmm::mr::detail::compare_blocks<block_type>> upstream_blocks_;
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:28
A coalescing best-fit suballocator which uses a pool of memory allocated from an upstream memory_reso...
Definition: pool_memory_resource.hpp:94
pool_memory_resource(Upstream2 &upstream_mr, std::size_t initial_pool_size, std::optional< std::size_t > maximum_pool_size=std::nullopt)
Construct a pool_memory_resource and allocate the initial device memory pool using upstream_mr.
Definition: pool_memory_resource.hpp:170
block_type free_block(void *ptr, std::size_t size) noexcept
Finds, frees and returns the block associated with pointer ptr.
Definition: pool_memory_resource.hpp:388
void initialize_pool(std::size_t initial_size, std::optional< std::size_t > maximum_size)
Allocate initial memory for the pool.
Definition: pool_memory_resource.hpp:281
split_block allocate_from_block(block_type const &block, std::size_t size)
Splits block if necessary to return a pointer to memory of size bytes.
Definition: pool_memory_resource.hpp:366
device_async_resource_ref get_upstream_resource() const noexcept
rmm::device_async_resource_ref to the upstream resource
Definition: pool_memory_resource.hpp:192
std::size_t size_to_grow(std::size_t size) const
Given a minimum size, computes an appropriate size to grow the pool.
Definition: pool_memory_resource.hpp:327
free_list::block_type block_type
The type of block returned by the free list.
Definition: pool_memory_resource.hpp:208
std::pair< std::size_t, std::size_t > free_list_summary(free_list const &blocks)
Get the largest available block size and total free size in the specified free list.
Definition: pool_memory_resource.hpp:466
std::size_t get_maximum_allocation_size() const
Get the maximum size of allocations supported by this memory resource.
Definition: pool_memory_resource.hpp:221
block_type expand_pool(std::size_t size, free_list &blocks, cuda_stream_view stream)
Allocate space from upstream to supply the suballocation pool and return a sufficiently sized block.
Definition: pool_memory_resource.hpp:305
void release()
Free all memory allocated from the upstream memory_resource.
Definition: pool_memory_resource.hpp:410
block_type try_to_expand(std::size_t try_size, std::size_t min_size, cuda_stream_view stream)
Try to expand the pool by allocating a block of at least min_size bytes from upstream.
Definition: pool_memory_resource.hpp:241
std::lock_guard< std::mutex > lock_guard
Type of lock used to synchronize access.
Definition: pool_memory_resource.hpp:211
std::size_t pool_size() const noexcept
Computes the size of the current pool.
Definition: pool_memory_resource.hpp:204
~pool_memory_resource() override
Destroy the pool_memory_resource and deallocate all memory it allocated using the upstream resource.
Definition: pool_memory_resource.hpp:181
detail::coalescing_free_list free_list
The free list implementation.
Definition: pool_memory_resource.hpp:207
pool_memory_resource(Upstream *upstream_mr, std::size_t initial_pool_size, std::optional< std::size_t > maximum_pool_size=std::nullopt)
Construct a pool_memory_resource and allocate the initial device memory pool using upstream_mr.
Definition: pool_memory_resource.hpp:141
block_type block_from_upstream(std::size_t size, cuda_stream_view stream)
Allocate a block from upstream to expand the suballocation pool.
Definition: pool_memory_resource.hpp:346
Exception thrown when RMM runs out of memory.
Definition: error.hpp:76
std::pair< std::size_t, std::size_t > available_device_memory()
Returns the available and total device memory in bytes for the current device.
static const cuda_stream_view cuda_stream_legacy
Static cuda_stream_view of cudaStreamLegacy, for convenience.
Definition: cuda_stream_view.hpp:116
device_async_resource_ref to_device_async_resource_ref_checked(Resource *res)
Convert pointer to memory resource into device_async_resource_ref, checking for nullptr
Definition: resource_ref.hpp:72
detail::cccl_async_resource_ref< cuda::mr::resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
Alias for a cuda::mr::async_resource_ref with the property cuda::mr::device_accessible.
Definition: resource_ref.hpp:32
static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT
Default alignment used for CUDA memory allocation.
Definition: aligned.hpp:31
bool is_aligned(std::size_t value, std::size_t alignment) noexcept
Checks whether a value is aligned to a multiple of a specified power of 2.
std::size_t align_up(std::size_t value, std::size_t alignment) noexcept
Align up to nearest multiple of specified power of 2.
Management of per-device device_memory_resources.
A helper class to remove the device_accessible property.
Definition: pool_memory_resource.hpp:52