error.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cudf/detail/utilities/stacktrace.hpp>
20 
21 #include <cuda.h>
22 #include <cuda_runtime_api.h>
23 
24 #include <stdexcept>
25 #include <string>
26 #include <type_traits>
27 
28 namespace cudf {
40  // Exclude the current stackframe, as it is this constructor.
41  : _stacktrace{cudf::detail::get_stacktrace(cudf::detail::capture_last_stackframe::NO)}
42  {
43  }
44 
45  public:
51  [[nodiscard]] char const* stacktrace() const { return _stacktrace.c_str(); }
52 
53  protected:
54  std::string const _stacktrace;
55 };
56 
63 struct logic_error : public std::logic_error, public stacktrace_recorder {
69  logic_error(char const* const message) : std::logic_error(message) {}
70 
76  logic_error(std::string const& message) : std::logic_error(message) {}
77 
78  // TODO Add an error code member? This would be useful for translating an
79  // exception to an error code in a pure-C API
80 
81  ~logic_error() override
82  {
83  // Needed so that the first instance of the implicit destructor for any TU isn't 'constructed'
84  // from a host+device function marking the implicit version also as host+device
85  }
86 };
91 struct cuda_error : public std::runtime_error, public stacktrace_recorder {
98  cuda_error(std::string const& message, cudaError_t const& error)
99  : std::runtime_error(message), _cudaError(error)
100  {
101  }
102 
103  public:
109  [[nodiscard]] cudaError_t error_code() const { return _cudaError; }
110 
111  protected:
112  cudaError_t _cudaError;
113 };
114 
115 struct fatal_cuda_error : public cuda_error {
116  using cuda_error::cuda_error; // Inherit constructors
117 };
118 
126 struct data_type_error : public std::invalid_argument, public stacktrace_recorder {
132  data_type_error(char const* const message) : std::invalid_argument(message) {}
133 
139  data_type_error(std::string const& message) : std::invalid_argument(message) {}
140 };
143 } // namespace cudf
144 
145 #define STRINGIFY_DETAIL(x) #x
146 #define CUDF_STRINGIFY(x) STRINGIFY_DETAIL(x)
147 
177 #define CUDF_EXPECTS(...) \
178  GET_CUDF_EXPECTS_MACRO(__VA_ARGS__, CUDF_EXPECTS_3, CUDF_EXPECTS_2) \
179  (__VA_ARGS__)
180 
182 
183 #define GET_CUDF_EXPECTS_MACRO(_1, _2, _3, NAME, ...) NAME
184 
185 #define CUDF_EXPECTS_3(_condition, _reason, _exception_type) \
186  do { \
187  static_assert(std::is_base_of_v<std::exception, _exception_type>); \
188  (_condition) ? static_cast<void>(0) \
189  : throw _exception_type /*NOLINT(bugprone-macro-parentheses)*/ \
190  {"CUDF failure at: " __FILE__ ":" CUDF_STRINGIFY(__LINE__) ": " _reason}; \
191  } while (0)
192 
193 #define CUDF_EXPECTS_2(_condition, _reason) CUDF_EXPECTS_3(_condition, _reason, cudf::logic_error)
194 
196 
216 #define CUDF_FAIL(...) \
217  GET_CUDF_FAIL_MACRO(__VA_ARGS__, CUDF_FAIL_2, CUDF_FAIL_1) \
218  (__VA_ARGS__)
219 
221 
222 #define GET_CUDF_FAIL_MACRO(_1, _2, NAME, ...) NAME
223 
224 #define CUDF_FAIL_2(_what, _exception_type) \
225  /*NOLINTNEXTLINE(bugprone-macro-parentheses)*/ \
226  throw _exception_type { "CUDF failure at:" __FILE__ ":" CUDF_STRINGIFY(__LINE__) ": " _what }
227 
228 #define CUDF_FAIL_1(_what) CUDF_FAIL_2(_what, cudf::logic_error)
229 
231 
232 namespace cudf {
233 namespace detail {
234 // @cond
235 inline void throw_cuda_error(cudaError_t error, char const* file, unsigned int line)
236 {
237  // Calls cudaGetLastError to clear the error status. It is nearly certain that a fatal error
238  // occurred if it still returns the same error after a cleanup.
239  cudaGetLastError();
240  auto const last = cudaFree(nullptr);
241  auto const msg = std::string{"CUDA error encountered at: " + std::string{file} + ":" +
242  std::to_string(line) + ": " + std::to_string(error) + " " +
243  cudaGetErrorName(error) + " " + cudaGetErrorString(error)};
244  // Call cudaDeviceSynchronize to ensure `last` did not result from an asynchronous error.
245  // between two calls.
246  if (error == last && last == cudaDeviceSynchronize()) {
247  throw fatal_cuda_error{"Fatal " + msg, error};
248  } else {
249  throw cuda_error{msg, error};
250  }
251 }
252 // @endcond
253 } // namespace detail
254 } // namespace cudf
255 
263 #define CUDF_CUDA_TRY(call) \
264  do { \
265  cudaError_t const status = (call); \
266  if (cudaSuccess != status) { cudf::detail::throw_cuda_error(status, __FILE__, __LINE__); } \
267  } while (0);
268 
282 #ifndef NDEBUG
283 #define CUDF_CHECK_CUDA(stream) \
284  do { \
285  CUDF_CUDA_TRY(cudaStreamSynchronize(stream)); \
286  CUDF_CUDA_TRY(cudaPeekAtLastError()); \
287  } while (0);
288 #else
289 #define CUDF_CHECK_CUDA(stream) CUDF_CUDA_TRY(cudaPeekAtLastError());
290 #endif
cuDF interfaces
Definition: aggregation.hpp:34
Exception thrown when a CUDA error is encountered.
Definition: error.hpp:91
cuda_error(std::string const &message, cudaError_t const &error)
Construct a new cuda error object with error message and code.
Definition: error.hpp:98
cudaError_t _cudaError
CUDA error code.
Definition: error.hpp:112
cudaError_t error_code() const
Returns the CUDA error code associated with the exception.
Definition: error.hpp:109
Exception thrown when an operation is attempted on an unsupported dtype.
Definition: error.hpp:126
data_type_error(std::string const &message)
Construct a new data_type_error object with error message.
Definition: error.hpp:139
data_type_error(char const *const message)
Constructs a data_type_error with the error message.
Definition: error.hpp:132
Exception thrown when logical precondition is violated.
Definition: error.hpp:63
logic_error(char const *const message)
Constructs a logic_error with the error message.
Definition: error.hpp:69
logic_error(std::string const &message)
Construct a new logic error object with error message.
Definition: error.hpp:76
The struct to store the current stacktrace upon its construction.
Definition: error.hpp:38
std::string const _stacktrace
The whole stacktrace stored as one string.
Definition: error.hpp:54
char const * stacktrace() const
Get the stored stacktrace captured during object construction.
Definition: error.hpp:51