nvtx.hpp
1 
5 #pragma once
6 
7 #include <source_location>
8 #include <stdexcept>
9 #include <string>
10 #include <string_view>
11 #include <type_traits>
12 
13 #include <nvtx3/nvtx3.hpp>
14 
15 #include <rapidsmpf/utils/misc.hpp>
16 
17 namespace rapidsmpf::detail {
18 
26 template <typename T>
27  requires std::is_integral_v<T>
28 [[nodiscard]] std::int64_t convert_to_64bit(T value) {
29  if constexpr (std::numeric_limits<T>::max()
30  > std::numeric_limits<std::int64_t>::max())
31  {
32  if (value > std::numeric_limits<std::int64_t>::max()) {
33  throw std::overflow_error(
34  "convert_to_64bit(x): x too large to fit std::int64_t"
35  );
36  }
37  }
38  return rapidsmpf::safe_cast<std::int64_t>(value);
39 }
40 
48 template <typename T>
49  requires std::is_floating_point_v<T>
50 [[nodiscard]] double convert_to_64bit(T value) {
51  return double(value);
52 }
53 
69 [[nodiscard]] constexpr std::string_view extract_func_name(
70  std::string_view pretty
71 ) noexcept {
72  // 1. Find the end boundary (either '(' or the end of the string)
73  auto const paren = pretty.find('(');
74  auto const end_pos = (paren == std::string_view::npos) ? pretty.size() : paren;
75 
76  // 2. Look for the last space before that boundary
77  auto const space = pretty.rfind(' ', end_pos);
78 
79  // 3. If no space is found, the name starts at 0.
80  // Otherwise, start right after the space.
81  auto const start_pos = (space == std::string_view::npos) ? 0 : space + 1;
82 
83  return pretty.substr(start_pos, end_pos - start_pos);
84 }
85 
86 } // namespace rapidsmpf::detail
87 
92  static constexpr char const* name{"rapidsmpf"};
93 };
94 
95 // Macro to create a static, registered string that will not have a name conflict with any
96 // registered string defined in the same scope.
97 #define RAPIDSMPF_REGISTER_STRING(msg) \
98  [](char const* a_msg) -> auto& { \
99  static nvtx3::registered_string_in<rapidsmpf_domain> a_reg_str{a_msg}; \
100  return a_reg_str; \
101  }(msg)
102 
103 // Macro to create a static, registered string for the enclosing function.
104 //
105 // Uses std::source_location::current() to obtain the fully-qualified function
106 // name (return type + class::method + parameter list) and then strips the
107 // return type and parameter list, leaving e.g. "MyClass::my_method" for member
108 // functions and "my_function" for free functions.
109 //
110 // std::source_location::current() is evaluated as the argument to the immediately
111 // invoked lambda so it captures the call-site (the enclosing function) rather than
112 // the lambda body. The extracted name is cached in a static std::string on first
113 // use so that nvtx3::registered_string_in receives a stable null-terminated pointer.
114 #define RAPIDSMPF_REGISTER_FUNC_STRING \
115  [](char const* pretty_fn) -> auto& { \
116  static const std::string s_func_name{ \
117  rapidsmpf::detail::extract_func_name(pretty_fn) \
118  }; \
119  static nvtx3::registered_string_in<rapidsmpf_domain> a_reg_str{ \
120  s_func_name.c_str() \
121  }; \
122  return a_reg_str; \
123  }(std::source_location::current().function_name())
124 
125 // implement the func range macro with a value
126 #define RAPIDSMPF_NVTX_FUNC_RANGE_IMPL_WITH_VAL(val) \
127  static_assert( \
128  std::is_arithmetic_v<decltype(val)>, \
129  "Value must be integral or floating point type" \
130  ); \
131  nvtx3::scoped_range_in<rapidsmpf_domain> RAPIDSMPF_CONCAT( \
132  _rapidsmpf_nvtx_range, __LINE__ \
133  ) { \
134  nvtx3::event_attributes { \
135  RAPIDSMPF_REGISTER_FUNC_STRING, nvtx3::payload { \
136  rapidsmpf::detail::convert_to_64bit(val) \
137  } \
138  } \
139  }
140 
141 // implement the func range macro without a value
142 #define RAPIDSMPF_NVTX_FUNC_RANGE_IMPL_WITHOUT_VAL() \
143  nvtx3::scoped_range_in<rapidsmpf_domain> RAPIDSMPF_CONCAT( \
144  _rapidsmpf_nvtx_range, __LINE__ \
145  ) { \
146  nvtx3::event_attributes { \
147  RAPIDSMPF_REGISTER_FUNC_STRING \
148  } \
149  }
150 
151 // Macro selector for 0 vs 1 arguments
152 #define RAPIDSMPF_GET_MACRO_FUNC(_0, _1, NAME, ...) NAME
153 
154 // unwrap the arguments and call the appropriate macro
155 #define RAPIDSMPF_NVTX_FUNC_RANGE_IMPL(...) \
156  RAPIDSMPF_GET_MACRO_FUNC(dummy __VA_OPT__(, ) __VA_ARGS__, RAPIDSMPF_NVTX_FUNC_RANGE_IMPL_WITH_VAL, RAPIDSMPF_NVTX_FUNC_RANGE_IMPL_WITHOUT_VAL)( \
157  __VA_ARGS__ \
158  )
159 
190 #define RAPIDSMPF_NVTX_FUNC_RANGE(...) RAPIDSMPF_NVTX_FUNC_RANGE_IMPL(__VA_ARGS__)
191 
192 // implement the scoped range macro with a value
193 #define RAPIDSMPF_NVTX_SCOPED_RANGE_IMPL_WITH_VAL(msg, val) \
194  nvtx3::scoped_range_in<rapidsmpf_domain> RAPIDSMPF_CONCAT( \
195  _rapidsmpf_nvtx_range, __LINE__ \
196  ) { \
197  nvtx3::event_attributes { \
198  RAPIDSMPF_REGISTER_STRING(msg), nvtx3::payload { \
199  rapidsmpf::detail::convert_to_64bit(val) \
200  } \
201  } \
202  }
203 
204 // implement the scoped range macro without a value
205 #define RAPIDSMPF_NVTX_SCOPED_RANGE_IMPL_WITHOUT_VAL(msg) \
206  nvtx3::scoped_range_in<rapidsmpf_domain> RAPIDSMPF_CONCAT( \
207  _rapidsmpf_nvtx_range, __LINE__ \
208  ) { \
209  nvtx3::event_attributes { \
210  RAPIDSMPF_REGISTER_STRING(msg) \
211  } \
212  }
213 
214 // Macro to detect number of arguments (1 or 2)
215 #define RAPIDSMPF_GET_MACRO(_1, _2, NAME, ...) NAME
216 
217 // unwrap the arguments and call the appropriate macro
218 #define RAPIDSMPF_NVTX_SCOPED_RANGE_IMPL(...) \
219  RAPIDSMPF_GET_MACRO( \
220  __VA_ARGS__, \
221  RAPIDSMPF_NVTX_SCOPED_RANGE_IMPL_WITH_VAL, \
222  RAPIDSMPF_NVTX_SCOPED_RANGE_IMPL_WITHOUT_VAL \
223  ) \
224  (__VA_ARGS__)
225 
247 #define RAPIDSMPF_NVTX_SCOPED_RANGE(...) RAPIDSMPF_NVTX_SCOPED_RANGE_IMPL(__VA_ARGS__)
248 
270 #if RAPIDSMPF_VERBOSE_INFO
271 #define RAPIDSMPF_NVTX_SCOPED_RANGE_VERBOSE(...) RAPIDSMPF_NVTX_SCOPED_RANGE(__VA_ARGS__)
272 #else
273 #define RAPIDSMPF_NVTX_SCOPED_RANGE_VERBOSE(...)
274 #endif
275 
276 #define RAPIDSMPF_NVTX_MARKER_IMPL(msg, val) \
277  nvtx3::mark_in<rapidsmpf_domain>(nvtx3::event_attributes{ \
278  RAPIDSMPF_REGISTER_STRING(msg), \
279  nvtx3::payload{rapidsmpf::detail::convert_to_64bit(val)} \
280  })
281 
291 #define RAPIDSMPF_NVTX_MARKER(message, payload) \
292  RAPIDSMPF_NVTX_MARKER_IMPL(message, payload)
293 
304 #if RAPIDSMPF_VERBOSE_INFO
305 #define RAPIDSMPF_NVTX_MARKER_VERBOSE(message, payload) \
306  RAPIDSMPF_NVTX_MARKER_IMPL(message, payload)
307 #else
308 #define RAPIDSMPF_NVTX_MARKER_VERBOSE(message, payload)
309 #endif
requires std::is_integral_v< T > std::int64_t convert_to_64bit(T value)
Convert an integral value to a 64-bit signed integer.
Definition: nvtx.hpp:28
constexpr std::string_view extract_func_name(std::string_view pretty) noexcept
Extract the qualified function name from a __PRETTY_FUNCTION__ string.
Definition: nvtx.hpp:69
Tag type for rapidsmpf's NVTX domain.
Definition: nvtx.hpp:91
static constexpr char const * name
nvtx domain name
Definition: nvtx.hpp:92