All Classes Files Functions Enumerations Enumerator Pages
utils.hpp
1 /*
2  * Copyright (c) 2021-2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <chrono>
19 #include <cstring>
20 #include <future>
21 #include <optional>
22 #include <stdexcept>
23 #include <tuple>
24 #include <type_traits>
25 
26 #ifdef KVIKIO_CUDA_FOUND
27 #include <nvtx3/nvtx3.hpp>
28 #endif
29 
30 #include <kvikio/shim/cuda.hpp>
31 
32 namespace kvikio {
33 
34 // cuFile defines a page size to 4 KiB
35 inline constexpr std::size_t page_size = 4096;
36 
37 [[nodiscard]] off_t convert_size2off(std::size_t x);
38 
39 [[nodiscard]] ssize_t convert_size2ssize(std::size_t x);
40 
41 [[nodiscard]] CUdeviceptr convert_void2deviceptr(const void* devPtr);
42 
46 template <typename T, std::enable_if_t<std::is_integral_v<T>>* = nullptr>
47 [[nodiscard]] std::int64_t convert_to_64bit(T value)
48 {
49  if constexpr (std::numeric_limits<T>::max() > std::numeric_limits<std::int64_t>::max()) {
50  if (value > std::numeric_limits<std::int64_t>::max()) {
51  throw std::overflow_error("convert_to_64bit(x): x too large to fit std::int64_t");
52  }
53  }
54  return std::int64_t(value);
55 }
56 
60 template <typename T, std::enable_if_t<std::is_floating_point_v<T>>* = nullptr>
61 [[nodiscard]] double convert_to_64bit(T value)
62 {
63  return double(value);
64 }
65 
74 #ifdef KVIKIO_CUDA_FOUND
75 bool is_host_memory(const void* ptr);
76 #else
77 constexpr bool is_host_memory(const void* ptr) { return true; }
78 #endif
79 
86 [[nodiscard]] int get_device_ordinal_from_pointer(CUdeviceptr dev_ptr);
87 
96 [[nodiscard]] KVIKIO_EXPORT CUcontext get_primary_cuda_context(int ordinal);
97 
104 [[nodiscard]] std::optional<CUcontext> get_context_associated_pointer(CUdeviceptr dev_ptr);
105 
112 [[nodiscard]] bool current_context_can_access_pointer(CUdeviceptr dev_ptr);
113 
130 [[nodiscard]] CUcontext get_context_from_pointer(const void* devPtr);
131 
136  private:
137  CUcontext _ctx;
138 
139  public:
140  PushAndPopContext(CUcontext ctx);
141  PushAndPopContext(const PushAndPopContext&) = delete;
142  PushAndPopContext& operator=(PushAndPopContext const&) = delete;
144  PushAndPopContext&& operator=(PushAndPopContext&&) = delete;
146 };
147 
148 // Find the base and offset of the memory allocation `devPtr` is in
149 std::tuple<void*, std::size_t, std::size_t> get_alloc_info(const void* devPtr,
150  CUcontext* ctx = nullptr);
151 
152 template <typename T>
153 bool is_future_done(const T& future)
154 {
155  return future.wait_for(std::chrono::seconds(0)) != std::future_status::timeout;
156 }
157 
158 #ifdef KVIKIO_CUDA_FOUND
162 struct libkvikio_domain {
163  static constexpr char const* name{"libkvikio"};
164 };
165 
166 // Macro to concatenate two tokens x and y.
167 #define KVIKIO_CONCAT_HELPER(x, y) x##y
168 #define KVIKIO_CONCAT(x, y) KVIKIO_CONCAT_HELPER(x, y)
169 
170 // Macro to create a static, registered string that will not have a name conflict with any
171 // registered string defined in the same scope.
172 #define KVIKIO_REGISTER_STRING(msg) \
173  [](const char* a_msg) -> auto& { \
174  static nvtx3::registered_string_in<libkvikio_domain> a_reg_str{a_msg}; \
175  return a_reg_str; \
176  }(msg)
177 
178 // Macro overloads of KVIKIO_NVTX_FUNC_RANGE
179 #define KVIKIO_NVTX_FUNC_RANGE_IMPL() NVTX3_FUNC_RANGE_IN(libkvikio_domain)
180 
181 #define KVIKIO_NVTX_SCOPED_RANGE_IMPL(msg, val) \
182  nvtx3::scoped_range_in<libkvikio_domain> KVIKIO_CONCAT(_kvikio_nvtx_range, __LINE__) \
183  { \
184  nvtx3::event_attributes \
185  { \
186  KVIKIO_REGISTER_STRING(msg), nvtx3::payload { convert_to_64bit(val) } \
187  } \
188  }
189 
190 #define KVIKIO_NVTX_MARKER_IMPL(msg, val) \
191  nvtx3::mark_in<libkvikio_domain>( \
192  nvtx3::event_attributes{KVIKIO_REGISTER_STRING(msg), nvtx3::payload{convert_to_64bit(val)}})
193 
194 #endif
195 
211 #ifdef KVIKIO_CUDA_FOUND
212 #define KVIKIO_NVTX_FUNC_RANGE() KVIKIO_NVTX_FUNC_RANGE_IMPL()
213 #else
214 #define KVIKIO_NVTX_FUNC_RANGE(...) \
215  do { \
216  } while (0)
217 #endif
218 
233 #ifdef KVIKIO_CUDA_FOUND
234 #define KVIKIO_NVTX_SCOPED_RANGE(msg, val) KVIKIO_NVTX_SCOPED_RANGE_IMPL(msg, val)
235 #else
236 #define KVIKIO_NVTX_SCOPED_RANGE(msg, val) \
237  do { \
238  } while (0)
239 #endif
240 
258 #ifdef KVIKIO_CUDA_FOUND
259 #define KVIKIO_NVTX_MARKER(message, payload) KVIKIO_NVTX_MARKER_IMPL(message, payload)
260 #else
261 #define KVIKIO_NVTX_MARKER(message, payload) \
262  do { \
263  } while (0)
264 #endif
265 
266 } // namespace kvikio
Push CUDA context on creation and pop it on destruction.
Definition: utils.hpp:135