bounce_buffer.hpp
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 #pragma once
6 
7 #include <stack>
8 
9 #include <kvikio/defaults.hpp>
10 
11 namespace kvikio {
12 
21  public:
28  void* allocate(std::size_t size);
29 
36  void deallocate(void* buffer, std::size_t size);
37 };
38 
49  public:
56  void* allocate(std::size_t size);
57 
64  void deallocate(void* buffer, std::size_t size);
65 };
66 
78  public:
85  void* allocate(std::size_t size);
86 
93  void deallocate(void* buffer, std::size_t size);
94 };
95 
117 template <typename Allocator = CudaPinnedAllocator>
119  private:
120  std::mutex _mutex{};
121  // Stack of free allocations (LIFO for cache locality)
122  std::stack<void*> _free_buffers{};
123  // The size of each allocation in `_free_buffers`
124  std::size_t _buffer_size{defaults::bounce_buffer_size()};
125  Allocator _allocator{};
126 
127  public:
136  class Buffer {
137  private:
138  BounceBufferPool* _pool;
139  void* _buffer;
140  std::size_t const _size;
141 
142  public:
143  Buffer(BounceBufferPool<Allocator>* pool, void* buffer, std::size_t size);
144  Buffer(Buffer const&) = delete;
145  Buffer& operator=(Buffer const&) = delete;
146  Buffer(Buffer&& o) = delete;
147  Buffer& operator=(Buffer&& o) = delete;
148  ~Buffer() noexcept;
149  void* get() noexcept;
150  void* get(std::ptrdiff_t offset) noexcept;
151  std::size_t size() noexcept;
152  };
153 
154  BounceBufferPool() = default;
155 
156  // Notice, we do not clear the allocations at destruction thus the allocations leaks
157  // at exit. We do this because `BounceBufferPool::instance()` stores the allocations in a
158  // static stack that are destructed below main, which is not allowed in CUDA:
159  // <https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#initialization>
160  ~BounceBufferPool() noexcept = default;
161 
162  private:
170  std::size_t _clear();
171 
180  void _ensure_buffer_size();
181 
182  public:
192  [[nodiscard]] Buffer get();
193 
203  void put(void* buffer, std::size_t size);
204 
213  std::size_t clear();
214 
222  KVIKIO_EXPORT static BounceBufferPool& instance();
223 
224  BounceBufferPool(BounceBufferPool const&) = delete;
225  BounceBufferPool& operator=(BounceBufferPool const&) = delete;
226  BounceBufferPool(BounceBufferPool&& o) = delete;
227  BounceBufferPool& operator=(BounceBufferPool&& o) = delete;
228 };
229 
236 
244 
252 } // namespace kvikio
RAII wrapper for a host bounce buffer allocation.
Thread-safe singleton pool for reusable bounce buffers.
void put(void *buffer, std::size_t size)
Return a buffer to the pool for reuse.
std::size_t clear()
Free all retained allocations in the pool.
static KVIKIO_EXPORT BounceBufferPool & instance()
Get the singleton instance of the pool.
Buffer get()
Acquire a bounce buffer from the pool.
Allocator for page-aligned AND CUDA-registered pinned host memory.
void deallocate(void *buffer, std::size_t size)
Deallocate memory previously allocated by this allocator.
void * allocate(std::size_t size)
Allocate page-aligned CUDA-registered pinned host memory.
Allocator for CUDA pinned host memory.
void deallocate(void *buffer, std::size_t size)
Deallocate memory previously allocated by this allocator.
void * allocate(std::size_t size)
Allocate CUDA pinned host memory.
Allocator for page-aligned host memory.
void deallocate(void *buffer, std::size_t size)
Deallocate memory previously allocated by this allocator.
void * allocate(std::size_t size)
Allocate page-aligned host memory.
static std::size_t bounce_buffer_size()
Get the size of the bounce buffer used to stage data in host memory.
KvikIO namespace.
Definition: batch.hpp:16