18 #include <rmm/cuda_device.hpp>
19 #include <rmm/cuda_stream_view.hpp>
20 #include <rmm/detail/cuda_util.hpp>
21 #include <rmm/detail/dynamic_load_runtime.hpp>
22 #include <rmm/detail/error.hpp>
23 #include <rmm/mr/device/cuda_async_view_memory_resource.hpp>
24 #include <rmm/mr/device/device_memory_resource.hpp>
26 #include <thrust/optional.h>
28 #include <cuda_runtime_api.h>
33 #if CUDART_VERSION >= 11020 // 11.2 introduced cudaMallocAsync
34 #ifndef RMM_DISABLE_CUDA_MALLOC_ASYNC
35 #define RMM_CUDA_MALLOC_ASYNC_SUPPORT
84 thrust::optional<std::size_t> release_threshold = {},
85 thrust::optional<allocation_handle_type> export_handle_type = {})
87 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
89 RMM_EXPECTS(rmm::detail::async_alloc::is_supported(),
90 "cudaMallocAsync not supported with this CUDA driver/runtime version");
93 cudaMemPoolProps pool_props{};
94 pool_props.allocType = cudaMemAllocationTypePinned;
95 pool_props.handleTypes =
static_cast<cudaMemAllocationHandleType
>(
97 RMM_EXPECTS(rmm::detail::async_alloc::is_export_handle_type_supported(pool_props.handleTypes),
98 "Requested IPC memory handle type not supported");
99 pool_props.location.type = cudaMemLocationTypeDevice;
100 pool_props.location.id = rmm::detail::current_device().
value();
101 cudaMemPool_t cuda_pool_handle{};
102 RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolCreate(&cuda_pool_handle, &pool_props));
103 pool_ = cuda_async_view_memory_resource{cuda_pool_handle};
108 int driver_version{};
109 RMM_CUDA_TRY(cudaDriverGetVersion(&driver_version));
110 constexpr
auto min_async_version{11050};
111 if (driver_version < min_async_version) {
113 RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolSetAttribute(
114 pool_handle(), cudaMemPoolReuseAllowOpportunistic, &disabled));
117 auto const [free, total] = rmm::detail::available_device_memory();
120 uint64_t threshold = release_threshold.value_or(total);
121 RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolSetAttribute(
122 pool_handle(), cudaMemPoolAttrReleaseThreshold, &threshold));
126 auto const pool_size = initial_pool_size.value_or(free / 2);
127 auto* ptr = do_allocate(pool_size, cuda_stream_default);
128 do_deallocate(ptr, pool_size, cuda_stream_default);
131 "cudaMallocAsync not supported by the version of the CUDA Toolkit used for this build");
135 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
140 [[nodiscard]] cudaMemPool_t pool_handle() const noexcept {
return pool_.pool_handle(); }
145 #if defined(RMM_CUDA_MALLOC_ASYNC_SUPPORT)
146 RMM_ASSERT_CUDA_SUCCESS(rmm::detail::async_alloc::cudaMemPoolDestroy(pool_handle()));
170 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
187 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
188 ptr = pool_.
allocate(bytes, stream);
205 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
206 pool_.deallocate(ptr, size, stream);
223 [[nodiscard]]
bool do_is_equal(device_memory_resource
const& other)
const noexcept
override
226 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
227 return (async_mr !=
nullptr) && (this->pool_handle() == async_mr->pool_handle());
229 return async_mr !=
nullptr;
240 [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
243 return std::make_pair(0, 0);