20 #include <rmm/detail/error.hpp>
21 #include <rmm/detail/export.hpp>
22 #include <rmm/detail/runtime_async_alloc.hpp>
23 #include <rmm/detail/thrust_namespace.h>
27 #include <cuda/std/type_traits>
28 #include <cuda_runtime_api.h>
34 namespace RMM_NAMESPACE {
61 none = cudaMemHandleTypeNone,
62 posix_file_descriptor =
63 cudaMemHandleTypePosixFileDescriptor,
66 cudaMemHandleTypeWin32,
67 win32_kmt = cudaMemHandleTypeWin32Kmt,
107 std::optional<std::size_t> release_threshold = {},
108 std::optional<allocation_handle_type> export_handle_type = {})
111 RMM_EXPECTS(rmm::detail::runtime_async_alloc::is_supported(),
112 "cudaMallocAsync not supported with this CUDA driver/runtime version");
115 cudaMemPoolProps pool_props{};
116 pool_props.allocType = cudaMemAllocationTypePinned;
117 pool_props.handleTypes =
static_cast<cudaMemAllocationHandleType
>(
118 export_handle_type.value_or(allocation_handle_type::none));
120 #if defined(CUDA_VERSION) && CUDA_VERSION >= RMM_MIN_HWDECOMPRESS_CUDA_DRIVER_VERSION
122 if (rmm::detail::runtime_async_alloc::is_hwdecompress_supported()) {
123 pool_props.usage =
static_cast<unsigned short>(mempool_usage::hw_decompress);
128 rmm::detail::runtime_async_alloc::is_export_handle_type_supported(pool_props.handleTypes),
129 "Requested IPC memory handle type not supported");
130 pool_props.location.type = cudaMemLocationTypeDevice;
132 cudaMemPool_t cuda_pool_handle{};
133 RMM_CUDA_TRY(cudaMemPoolCreate(&cuda_pool_handle, &pool_props));
134 pool_ = cuda_async_view_memory_resource{cuda_pool_handle};
139 uint64_t threshold = release_threshold.value_or(total);
141 cudaMemPoolSetAttribute(pool_handle(), cudaMemPoolAttrReleaseThreshold, &threshold));
145 auto const pool_size = initial_pool_size.value_or(free / 2);
155 [[nodiscard]] cudaMemPool_t
pool_handle() const noexcept {
return pool_.pool_handle(); }
159 RMM_ASSERT_CUDA_SUCCESS(cudaMemPoolDestroy(pool_handle()));
161 cuda_async_memory_resource(cuda_async_memory_resource
const&) =
delete;
162 cuda_async_memory_resource(cuda_async_memory_resource&&) =
delete;
163 cuda_async_memory_resource& operator=(cuda_async_memory_resource
const&) =
delete;
164 cuda_async_memory_resource& operator=(cuda_async_memory_resource&&) =
delete;
167 cuda_async_view_memory_resource pool_{};
181 ptr = pool_.allocate(bytes, stream);
195 pool_.deallocate(ptr, bytes, stream);
205 [[nodiscard]]
bool do_is_equal(device_memory_resource
const& other)
const noexcept
override
207 auto const* async_mr =
dynamic_cast<cuda_async_memory_resource const*
>(&other);
208 return (async_mr !=
nullptr) && (this->pool_handle() == async_mr->pool_handle());
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:39
device_memory_resource derived class that uses cudaMallocAsync/cudaFreeAsync for allocation/deallocat...
Definition: cuda_async_memory_resource.hpp:46
allocation_handle_type
Flags for specifying memory allocation handle types.
Definition: cuda_async_memory_resource.hpp:60
cuda_async_memory_resource(std::optional< std::size_t > initial_pool_size={}, std::optional< std::size_t > release_threshold={}, std::optional< allocation_handle_type > export_handle_type={})
Constructs a cuda_async_memory_resource with the optionally specified initial pool size and release t...
Definition: cuda_async_memory_resource.hpp:106
mempool_usage
Flags for specifying memory pool usage.
Definition: cuda_async_memory_resource.hpp:83
cudaMemPool_t pool_handle() const noexcept
Returns the underlying native handle to the CUDA pool.
Definition: cuda_async_memory_resource.hpp:155
Base class for all librmm device memory allocation.
Definition: device_memory_resource.hpp:92
std::pair< std::size_t, std::size_t > available_device_memory()
Returns the available and total device memory in bytes for the current device.
cuda_device_id get_current_cuda_device()
Returns a cuda_device_id for the current device.
static constexpr cuda_stream_view cuda_stream_default
Static cuda_stream_view of the default stream (stream 0), for convenience.
Definition: cuda_stream_view.hpp:122
constexpr value_type value() const noexcept
The wrapped integer value.
Definition: cuda_device.hpp:54