20 #include <rmm/detail/error.hpp>
21 #include <rmm/detail/export.hpp>
22 #include <rmm/detail/logging_assert.hpp>
23 #include <rmm/detail/thrust_namespace.h>
24 #include <rmm/mr/device/detail/fixed_size_free_list.hpp>
25 #include <rmm/mr/device/detail/stream_ordered_memory_resource.hpp>
28 #include <cuda_runtime_api.h>
29 #include <thrust/iterator/counting_iterator.h>
30 #include <thrust/iterator/transform_iterator.h>
39 namespace RMM_NAMESPACE {
52 template <
typename Upstream>
54 :
public detail::stream_ordered_memory_resource<fixed_size_memory_resource<Upstream>,
55 detail::fixed_size_free_list> {
58 detail::fixed_size_free_list>;
60 static constexpr std::size_t default_block_size = 1 << 20;
64 static constexpr std::size_t default_blocks_to_preallocate = 128;
77 explicit fixed_size_memory_resource(
78 device_async_resource_ref upstream_mr,
80 std::size_t block_size = default_block_size,
81 std::size_t blocks_to_preallocate = default_blocks_to_preallocate)
82 : upstream_mr_{upstream_mr},
83 block_size_{align_up(block_size, CUDA_ALLOCATION_ALIGNMENT)},
84 upstream_chunk_size_{block_size_ * blocks_to_preallocate}
87 this->insert_blocks(std::move(blocks_from_upstream(cuda_stream_legacy)), cuda_stream_legacy);
101 explicit fixed_size_memory_resource(
102 Upstream* upstream_mr,
104 std::size_t block_size = default_block_size,
105 std::size_t blocks_to_preallocate = default_blocks_to_preallocate)
106 : upstream_mr_{to_device_async_resource_ref_checked(upstream_mr)},
107 block_size_{align_up(block_size, CUDA_ALLOCATION_ALIGNMENT)},
108 upstream_chunk_size_{block_size_ * blocks_to_preallocate}
111 this->insert_blocks(std::move(blocks_from_upstream(cuda_stream_legacy)), cuda_stream_legacy);
139 [[nodiscard]] std::size_t
get_block_size() const noexcept {
return block_size_; }
144 using typename detail::stream_ordered_memory_resource<fixed_size_memory_resource<Upstream>,
145 detail::fixed_size_free_list>::split_block;
169 blocks.insert(std::move(blocks_from_upstream(stream)));
170 return blocks.get_block(size);
181 void* ptr = get_upstream_resource().allocate_async(upstream_chunk_size_, stream);
183 upstream_blocks_.push_back(block);
185 auto num_blocks = upstream_chunk_size_ / block_size_;
187 auto block_gen = [ptr,
this](
int index) {
189 return block_type{
static_cast<char*
>(ptr) + index * block_size_};
192 thrust::make_transform_iterator(thrust::make_counting_iterator(std::size_t{0}), block_gen);
193 return free_list(first, first + num_blocks);
235 for (
auto block : upstream_blocks_) {
236 get_upstream_resource().deallocate(block.pointer(), upstream_chunk_size_);
238 upstream_blocks_.clear();
241 #ifdef RMM_DEBUG_PRINT
244 lock_guard lock(this->get_mutex());
247 std::cout <<
"GPU free memory: " << free <<
" total: " << total <<
"\n";
249 std::cout <<
"upstream_blocks: " << upstream_blocks_.size() <<
"\n";
250 std::size_t upstream_total{0};
252 for (
auto blocks : upstream_blocks_) {
254 upstream_total += upstream_chunk_size_;
256 std::cout <<
"total upstream: " << upstream_total <<
" B\n";
258 this->print_free_blocks();
272 return blocks.is_empty() ? std::make_pair(std::size_t{0}, std::size_t{0})
273 : std::make_pair(block_size_, blocks.size() * block_size_);
279 std::size_t block_size_;
280 std::size_t upstream_chunk_size_;
283 std::vector<block_type> upstream_blocks_;
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:41
A device_memory_resource which allocates memory blocks of a single fixed size.
Definition: fixed_size_memory_resource.hpp:55
std::pair< std::size_t, std::size_t > free_list_summary(free_list const &blocks)
Get the largest available block size and total free size in the specified free list.
Definition: fixed_size_memory_resource.hpp:270
detail::fixed_size_free_list free_list
The free list type.
Definition: fixed_size_memory_resource.hpp:142
block_type free_block(void *ptr, std::size_t size) noexcept
Finds, frees and returns the block associated with pointer.
Definition: fixed_size_memory_resource.hpp:219
std::size_t get_block_size() const noexcept
Get the size of blocks allocated by this memory resource.
Definition: fixed_size_memory_resource.hpp:139
std::size_t get_maximum_allocation_size() const
Get the (fixed) size of allocations supported by this memory resource.
Definition: fixed_size_memory_resource.hpp:154
free_list::block_type block_type
The type of block managed by the free list.
Definition: fixed_size_memory_resource.hpp:143
device_async_resource_ref get_upstream_resource() const noexcept
device_async_resource_ref to the upstream resource
Definition: fixed_size_memory_resource.hpp:129
block_type expand_pool(std::size_t size, free_list &blocks, cuda_stream_view stream)
Allocate a block from upstream to supply the suballocation pool.
Definition: fixed_size_memory_resource.hpp:167
free_list blocks_from_upstream(cuda_stream_view stream)
Allocate blocks from upstream to expand the suballocation pool.
Definition: fixed_size_memory_resource.hpp:179
std::lock_guard< std::mutex > lock_guard
Type of lock used to synchronize access.
Definition: fixed_size_memory_resource.hpp:146
void release()
free all memory allocated using the upstream resource.
Definition: fixed_size_memory_resource.hpp:231
split_block allocate_from_block(block_type const &block, std::size_t size)
Splits block if necessary to return a pointer to memory of size bytes.
Definition: fixed_size_memory_resource.hpp:206
std::pair< std::size_t, std::size_t > available_device_memory()
Returns the available and total device memory in bytes for the current device.
Definition: cuda_device.hpp:120
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
Alias for a cuda::mr::async_resource_ref with the property cuda::mr::device_accessible.
Definition: resource_ref.hpp:41
static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT
Default alignment used for CUDA memory allocation.
Definition: aligned.hpp:43
constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcept
Align up to nearest multiple of specified power of 2.
Definition: aligned.hpp:77