error.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cudf/detail/utilities/stacktrace.hpp>
20 #include <cudf/utilities/export.hpp>
21 
22 #include <cuda.h>
23 #include <cuda_runtime_api.h>
24 
25 #include <stdexcept>
26 #include <string>
27 #include <type_traits>
28 
29 namespace CUDF_EXPORT cudf {
41  // Exclude the current stackframe, as it is this constructor.
42  : _stacktrace{cudf::detail::get_stacktrace(cudf::detail::capture_last_stackframe::NO)}
43  {
44  }
45 
46  public:
52  [[nodiscard]] char const* stacktrace() const { return _stacktrace.c_str(); }
53 
54  protected:
55  std::string const _stacktrace;
56 };
57 
64 struct logic_error : public std::logic_error, public stacktrace_recorder {
70  logic_error(char const* const message) : std::logic_error(message) {}
71 
77  logic_error(std::string const& message) : std::logic_error(message) {}
78 
79  // TODO Add an error code member? This would be useful for translating an
80  // exception to an error code in a pure-C API
81 
82  ~logic_error() override
83  {
84  // Needed so that the first instance of the implicit destructor for any TU isn't 'constructed'
85  // from a host+device function marking the implicit version also as host+device
86  }
87 };
92 struct cuda_error : public std::runtime_error, public stacktrace_recorder {
99  cuda_error(std::string const& message, cudaError_t const& error)
100  : std::runtime_error(message), _cudaError(error)
101  {
102  }
103 
104  public:
110  [[nodiscard]] cudaError_t error_code() const { return _cudaError; }
111 
112  protected:
113  cudaError_t _cudaError;
114 };
115 
116 struct fatal_cuda_error : public cuda_error {
117  using cuda_error::cuda_error; // Inherit constructors
118 };
119 
127 struct data_type_error : public std::invalid_argument, public stacktrace_recorder {
133  data_type_error(char const* const message) : std::invalid_argument(message) {}
134 
140  data_type_error(std::string const& message) : std::invalid_argument(message) {}
141 };
144 } // namespace CUDF_EXPORT cudf
145 
146 #define STRINGIFY_DETAIL(x) #x
147 #define CUDF_STRINGIFY(x) STRINGIFY_DETAIL(x)
148 
178 #define CUDF_EXPECTS(...) \
179  GET_CUDF_EXPECTS_MACRO(__VA_ARGS__, CUDF_EXPECTS_3, CUDF_EXPECTS_2) \
180  (__VA_ARGS__)
181 
183 
184 #define GET_CUDF_EXPECTS_MACRO(_1, _2, _3, NAME, ...) NAME
185 
186 #define CUDF_EXPECTS_3(_condition, _reason, _exception_type) \
187  do { \
188  static_assert(std::is_base_of_v<std::exception, _exception_type>); \
189  (_condition) ? static_cast<void>(0) \
190  : throw _exception_type /*NOLINT(bugprone-macro-parentheses)*/ \
191  {"CUDF failure at: " __FILE__ ":" CUDF_STRINGIFY(__LINE__) ": " _reason}; \
192  } while (0)
193 
194 #define CUDF_EXPECTS_2(_condition, _reason) CUDF_EXPECTS_3(_condition, _reason, cudf::logic_error)
195 
197 
217 #define CUDF_FAIL(...) \
218  GET_CUDF_FAIL_MACRO(__VA_ARGS__, CUDF_FAIL_2, CUDF_FAIL_1) \
219  (__VA_ARGS__)
220 
222 
223 #define GET_CUDF_FAIL_MACRO(_1, _2, NAME, ...) NAME
224 
225 #define CUDF_FAIL_2(_what, _exception_type) \
226  /*NOLINTNEXTLINE(bugprone-macro-parentheses)*/ \
227  throw _exception_type { "CUDF failure at:" __FILE__ ":" CUDF_STRINGIFY(__LINE__) ": " _what }
228 
229 #define CUDF_FAIL_1(_what) CUDF_FAIL_2(_what, cudf::logic_error)
230 
232 
233 namespace CUDF_EXPORT cudf {
234 namespace detail {
235 // @cond
236 inline void throw_cuda_error(cudaError_t error, char const* file, unsigned int line)
237 {
238  // Calls cudaGetLastError to clear the error status. It is nearly certain that a fatal error
239  // occurred if it still returns the same error after a cleanup.
240  cudaGetLastError();
241  auto const last = cudaFree(nullptr);
242  auto const msg = std::string{"CUDA error encountered at: " + std::string{file} + ":" +
243  std::to_string(line) + ": " + std::to_string(error) + " " +
244  cudaGetErrorName(error) + " " + cudaGetErrorString(error)};
245  // Call cudaDeviceSynchronize to ensure `last` did not result from an asynchronous error.
246  // between two calls.
247  if (error == last && last == cudaDeviceSynchronize()) {
248  throw fatal_cuda_error{"Fatal " + msg, error};
249  } else {
250  throw cuda_error{msg, error};
251  }
252 }
253 // @endcond
254 } // namespace detail
255 } // namespace CUDF_EXPORT cudf
256 
264 #define CUDF_CUDA_TRY(call) \
265  do { \
266  cudaError_t const status = (call); \
267  if (cudaSuccess != status) { cudf::detail::throw_cuda_error(status, __FILE__, __LINE__); } \
268  } while (0);
269 
283 #ifndef NDEBUG
284 #define CUDF_CHECK_CUDA(stream) \
285  do { \
286  CUDF_CUDA_TRY(cudaStreamSynchronize(stream)); \
287  CUDF_CUDA_TRY(cudaPeekAtLastError()); \
288  } while (0);
289 #else
290 #define CUDF_CHECK_CUDA(stream) CUDF_CUDA_TRY(cudaPeekAtLastError());
291 #endif
cuDF interfaces
Definition: aggregation.hpp:35
Exception thrown when a CUDA error is encountered.
Definition: error.hpp:92
cuda_error(std::string const &message, cudaError_t const &error)
Construct a new cuda error object with error message and code.
Definition: error.hpp:99
cudaError_t _cudaError
CUDA error code.
Definition: error.hpp:113
cudaError_t error_code() const
Returns the CUDA error code associated with the exception.
Definition: error.hpp:110
Exception thrown when an operation is attempted on an unsupported dtype.
Definition: error.hpp:127
data_type_error(std::string const &message)
Construct a new data_type_error object with error message.
Definition: error.hpp:140
data_type_error(char const *const message)
Constructs a data_type_error with the error message.
Definition: error.hpp:133
Exception thrown when logical precondition is violated.
Definition: error.hpp:64
logic_error(char const *const message)
Constructs a logic_error with the error message.
Definition: error.hpp:70
logic_error(std::string const &message)
Construct a new logic error object with error message.
Definition: error.hpp:77
The struct to store the current stacktrace upon its construction.
Definition: error.hpp:39
std::string const _stacktrace
The whole stacktrace stored as one string.
Definition: error.hpp:55
char const * stacktrace() const
Get the stored stacktrace captured during object construction.
Definition: error.hpp:52