error.hpp
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 #pragma once
6 
7 #include <cstring>
8 #include <stdexcept>
9 #include <string>
10 #include <system_error>
11 
12 #include <kvikio/shim/cuda.hpp>
13 #include <kvikio/shim/cufile_h_wrapper.hpp>
14 
15 namespace kvikio {
16 
17 struct CUfileException : public std::runtime_error {
18  using std::runtime_error::runtime_error;
19 };
20 
21 class GenericSystemError : public std::system_error {
22  public:
23  GenericSystemError(int err_code, std::string const& msg);
24  GenericSystemError(int err_code, char const* msg);
25  GenericSystemError(std::string const& msg);
26  GenericSystemError(char const* msg);
27  GenericSystemError(GenericSystemError const& other) = default;
28  GenericSystemError& operator=(GenericSystemError const& other) = default;
29  virtual ~GenericSystemError() noexcept = default;
30 };
31 
32 #define KVIKIO_VA_SELECT_3(_1, _2, _3, NAME, ...) NAME
33 #define KVIKIO_VA_SELECT_2(_1, _2, NAME, ...) NAME
34 
60 #define KVIKIO_CUDA_DRIVER_TRY(...) \
61  KVIKIO_VA_SELECT_2(__VA_ARGS__, KVIKIO_CUDA_DRIVER_TRY_2, KVIKIO_CUDA_DRIVER_TRY_1) \
62  (__VA_ARGS__)
65 #define KVIKIO_CUDA_DRIVER_TRY_2(_call, _exception_type) \
66  do { \
67  kvikio::detail::cuda_driver_try<_exception_type>(_call, __LINE__, __FILE__); \
68  } while (0)
69 #define KVIKIO_CUDA_DRIVER_TRY_1(_call) KVIKIO_CUDA_DRIVER_TRY_2(_call, kvikio::CUfileException)
70 
96 #define KVIKIO_CUFILE_TRY(...) \
97  KVIKIO_VA_SELECT_2(__VA_ARGS__, KVIKIO_CUFILE_TRY_2, KVIKIO_CUFILE_TRY_1) \
98  (__VA_ARGS__)
101 #define KVIKIO_CUFILE_TRY_2(_call, _exception_type) \
102  do { \
103  kvikio::detail::cufile_try<_exception_type>(_call, __LINE__, __FILE__); \
104  } while (0)
105 #define KVIKIO_CUFILE_TRY_1(_call) KVIKIO_CUFILE_TRY_2(_call, kvikio::CUfileException)
106 
107 #define KVIKIO_CUFILE_CHECK_BYTES_DONE(...) \
108  KVIKIO_VA_SELECT_2( \
109  __VA_ARGS__, KVIKIO_CUFILE_CHECK_BYTES_DONE_2, KVIKIO_CUFILE_CHECK_BYTES_DONE_1) \
110  (__VA_ARGS__)
111 #define KVIKIO_CUFILE_CHECK_BYTES_DONE_2(_nbytes_done, _exception_type) \
112  do { \
113  kvikio::detail::cufile_check_bytes_done<_exception_type>(_nbytes_done, __LINE__, __FILE__); \
114  } while (0)
115 #define KVIKIO_CUFILE_CHECK_BYTES_DONE_1(_call) \
116  KVIKIO_CUFILE_CHECK_BYTES_DONE_2(_call, kvikio::CUfileException)
117 
147 #define KVIKIO_EXPECT(...) \
148  KVIKIO_VA_SELECT_3(__VA_ARGS__, KVIKIO_EXPECT_3, KVIKIO_EXPECT_2)(__VA_ARGS__)
151 // The message argument is wrapped in a lambda to defer evaluation: any expensive expressions (e.g.,
152 // string concatenation, std::to_string) are only evaluated when the condition fails. Without this,
153 // the message would be constructed unconditionally at every call site.
154 #define KVIKIO_EXPECT_3(_condition, _msg, _exception_type) \
155  do { \
156  if (!(_condition)) { \
157  kvikio::detail::kvikio_fail<_exception_type>( \
158  [&]() -> std::string { return _msg; }, __LINE__, __FILE__); \
159  } \
160  } while (0)
161 
162 #define KVIKIO_EXPECT_2(_condition, _msg) KVIKIO_EXPECT_3(_condition, _msg, kvikio::CUfileException)
163 
187 #define KVIKIO_FAIL(...) KVIKIO_VA_SELECT_2(__VA_ARGS__, KVIKIO_FAIL_2, KVIKIO_FAIL_1)(__VA_ARGS__)
190 // Wrap the message in a lambda to defer evaluation. See comments for `KVIKIO_EXPECT_3`
191 #define KVIKIO_FAIL_2(_msg, _exception_type) \
192  kvikio::detail::kvikio_fail<_exception_type>( \
193  [&]() -> std::string { return _msg; }, __LINE__, __FILE__)
194 
195 #define KVIKIO_FAIL_1(_msg) KVIKIO_FAIL_2(_msg, kvikio::CUfileException)
196 
234 #define KVIKIO_SYSCALL_CHECK(...) \
235  KVIKIO_VA_SELECT_3( \
236  __VA_ARGS__, KVIKIO_SYSCALL_CHECK_3, KVIKIO_SYSCALL_CHECK_2, KVIKIO_SYSCALL_CHECK_1) \
237  (__VA_ARGS__)
240 #define KVIKIO_SYSCALL_CHECK_1(_return_value) \
241  do { \
242  kvikio::detail::check_linux_call(_return_value, __LINE__, __FILE__); \
243  } while (0)
244 #define KVIKIO_SYSCALL_CHECK_2(_return_value, _extra_msg) \
245  do { \
246  kvikio::detail::check_linux_call(_return_value, __LINE__, __FILE__, _extra_msg); \
247  } while (0)
248 #define KVIKIO_SYSCALL_CHECK_3(_return_value, _extra_msg, _error_value) \
249  do { \
250  kvikio::detail::check_linux_call(_return_value, __LINE__, __FILE__, _extra_msg, _error_value); \
251  } while (0)
252 
253 namespace detail {
265 template <typename Exception, typename MsgFunc>
266 [[noreturn]] void kvikio_fail(MsgFunc&& msg_func, int line_number, char const* filename)
267 {
268  std::string const msg = std::forward<MsgFunc>(msg_func)();
269  throw Exception{std::string{"KvikIO failure at: "} + filename + ":" +
270  std::to_string(line_number) + ": " + (msg.empty() ? "(no message)" : msg)};
271 }
272 
282 template <typename Exception>
283 void cuda_driver_try(CUresult error, int line_number, char const* filename)
284 {
285  if (error == CUDA_ERROR_STUB_LIBRARY) {
286  throw Exception{std::string{"CUDA error at: "} + std::string(filename) + ":" +
287  std::to_string(line_number) +
288  ": CUDA_ERROR_STUB_LIBRARY("
289  "The CUDA driver loaded is a stub library)"};
290  }
291  if (error != CUDA_SUCCESS) {
292  char const* err_name = nullptr;
293  char const* err_str = nullptr;
294  CUresult err_name_status = cudaAPI::instance().GetErrorName(error, &err_name);
295  CUresult err_str_status = cudaAPI::instance().GetErrorString(error, &err_str);
296  if (err_name_status == CUDA_ERROR_INVALID_VALUE) { err_name = "unknown"; }
297  if (err_str_status == CUDA_ERROR_INVALID_VALUE) { err_str = "unknown"; }
298  throw Exception{std::string{"CUDA error at: "} + filename + ":" + std::to_string(line_number) +
299  ": " + std::string(err_name) + "(" + std::string(err_str) + ")"};
300  }
301 }
302 
314 template <typename Exception>
315 void cufile_try(CUfileError_t error, int line_number, char const* filename)
316 {
317  if (error.err != CU_FILE_SUCCESS) {
318  if (error.err == CU_FILE_CUDA_DRIVER_ERROR) {
319  cuda_driver_try<Exception>(error.cu_err, line_number, filename);
320  }
321  throw Exception{std::string{"cuFile error at: "} + filename + ":" +
322  std::to_string(line_number) + ": " + cufileop_status_error(error.err)};
323  }
324 }
325 
338 template <typename Exception>
339 void cufile_check_bytes_done(ssize_t nbytes_done, int line_number, char const* filename)
340 {
341  if (nbytes_done < 0) {
342  auto const err = static_cast<long>(-nbytes_done);
343  auto const msg = (err > CUFILEOP_BASE_ERR)
344  ? std::string(cufileop_status_error(static_cast<CUfileOpError>(err)))
345  : std::string(std::strerror(err));
346  throw Exception{std::string{"cuFile error at: "} + filename + ":" +
347  std::to_string(line_number) + ": " + msg};
348  }
349 }
350 
361 [[noreturn]] inline void handle_linux_call_error(int line_number,
362  char const* filename,
363  std::string_view extra_msg)
364 {
365  auto msg = extra_msg.empty() ? std::string{} : (std::string{extra_msg} + " ");
366  msg += std::string{"Linux system/library function call error at: "} + filename + ":" +
367  std::to_string(line_number);
368  // std::system_error::what() automatically contains the detailed error description
369  // equivalent to calling strerror(errno)
370  throw kvikio::GenericSystemError(msg);
371 }
372 
386 inline void check_linux_call(long return_value,
387  int line_number,
388  char const* filename,
389  std::string_view extra_msg = "",
390  long error_value = -1)
391 {
392  if (return_value == error_value) { handle_linux_call_error(line_number, filename, extra_msg); }
393 }
394 
408 template <typename T>
409 void check_linux_call(
410  T return_value, int line_number, char const* filename, std::string_view extra_msg, T error_value)
411 {
412  if (return_value == error_value) { handle_linux_call_error(line_number, filename, extra_msg); }
413 }
414 
415 } // namespace detail
416 
417 } // namespace kvikio
KvikIO namespace.
Definition: batch.hpp:16