24 #include <type_traits>
26 #ifdef KVIKIO_CUDA_FOUND
27 #include <nvtx3/nvtx3.hpp>
30 #include <kvikio/shim/cuda.hpp>
35 inline constexpr std::size_t page_size = 4096;
37 [[nodiscard]] off_t convert_size2off(std::size_t x);
39 [[nodiscard]] ssize_t convert_size2ssize(std::size_t x);
41 [[nodiscard]] CUdeviceptr convert_void2deviceptr(
const void* devPtr);
46 template <
typename T, std::enable_if_t<std::is_
integral_v<T>>* =
nullptr>
47 [[nodiscard]] std::int64_t convert_to_64bit(T value)
49 if constexpr (std::numeric_limits<T>::max() > std::numeric_limits<std::int64_t>::max()) {
50 if (value > std::numeric_limits<std::int64_t>::max()) {
51 throw std::overflow_error(
"convert_to_64bit(x): x too large to fit std::int64_t");
54 return std::int64_t(value);
60 template <
typename T, std::enable_if_t<std::is_
floating_po
int_v<T>>* =
nullptr>
61 [[nodiscard]]
double convert_to_64bit(T value)
74 #ifdef KVIKIO_CUDA_FOUND
75 bool is_host_memory(
const void* ptr);
77 constexpr
bool is_host_memory(
const void* ptr) {
return true; }
86 [[nodiscard]]
int get_device_ordinal_from_pointer(CUdeviceptr dev_ptr);
96 [[nodiscard]] KVIKIO_EXPORT CUcontext get_primary_cuda_context(
int ordinal);
104 [[nodiscard]] std::optional<CUcontext> get_context_associated_pointer(CUdeviceptr dev_ptr);
112 [[nodiscard]]
bool current_context_can_access_pointer(CUdeviceptr dev_ptr);
130 [[nodiscard]] CUcontext get_context_from_pointer(
const void* devPtr);
149 std::tuple<void*, std::size_t, std::size_t> get_alloc_info(
const void* devPtr,
150 CUcontext* ctx =
nullptr);
152 template <
typename T>
153 bool is_future_done(
const T& future)
155 return future.wait_for(std::chrono::seconds(0)) != std::future_status::timeout;
158 #ifdef KVIKIO_CUDA_FOUND
162 struct libkvikio_domain {
163 static constexpr
char const* name{
"libkvikio"};
167 #define KVIKIO_CONCAT_HELPER(x, y) x##y
168 #define KVIKIO_CONCAT(x, y) KVIKIO_CONCAT_HELPER(x, y)
172 #define KVIKIO_REGISTER_STRING(msg) \
173 [](const char* a_msg) -> auto& { \
174 static nvtx3::registered_string_in<libkvikio_domain> a_reg_str{a_msg}; \
179 #define KVIKIO_NVTX_FUNC_RANGE_IMPL() NVTX3_FUNC_RANGE_IN(libkvikio_domain)
181 #define KVIKIO_NVTX_SCOPED_RANGE_IMPL(msg, val) \
182 nvtx3::scoped_range_in<libkvikio_domain> KVIKIO_CONCAT(_kvikio_nvtx_range, __LINE__) \
184 nvtx3::event_attributes \
186 KVIKIO_REGISTER_STRING(msg), nvtx3::payload { convert_to_64bit(val) } \
190 #define KVIKIO_NVTX_MARKER_IMPL(msg, val) \
191 nvtx3::mark_in<libkvikio_domain>( \
192 nvtx3::event_attributes{KVIKIO_REGISTER_STRING(msg), nvtx3::payload{convert_to_64bit(val)}})
211 #ifdef KVIKIO_CUDA_FOUND
212 #define KVIKIO_NVTX_FUNC_RANGE() KVIKIO_NVTX_FUNC_RANGE_IMPL()
214 #define KVIKIO_NVTX_FUNC_RANGE(...) \
233 #ifdef KVIKIO_CUDA_FOUND
234 #define KVIKIO_NVTX_SCOPED_RANGE(msg, val) KVIKIO_NVTX_SCOPED_RANGE_IMPL(msg, val)
236 #define KVIKIO_NVTX_SCOPED_RANGE(msg, val) \
258 #ifdef KVIKIO_CUDA_FOUND
259 #define KVIKIO_NVTX_MARKER(message, payload) KVIKIO_NVTX_MARKER_IMPL(message, payload)
261 #define KVIKIO_NVTX_MARKER(message, payload) \
Push CUDA context on creation and pop it on destruction.