types.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2018-2025, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
8 #ifdef __CUDACC__
12 #define CUDF_HOST_DEVICE __host__ __device__
16 #define CUDF_KERNEL __global__ static
17 #else
21 #define CUDF_HOST_DEVICE
25 #define CUDF_KERNEL static
26 #endif
27 
28 #include <cudf/utilities/export.hpp>
29 
30 #include <cassert>
31 #include <cstddef>
32 #include <cstdint>
33 #include <iterator>
34 
40 // Forward declarations
42 namespace rmm {
43 class device_buffer;
45 
46 } // namespace rmm
47 
48 namespace CUDF_EXPORT cudf {
49 // Forward declaration
50 class column;
51 class column_view;
52 class mutable_column_view;
53 class string_view;
54 class list_view;
55 class struct_view;
56 class scalar;
57 
58 // clang-format off
59 class list_scalar;
60 class struct_scalar;
61 class string_scalar;
62 template <typename T> class numeric_scalar;
63 template <typename T> class fixed_point_scalar;
64 template <typename T> class timestamp_scalar;
65 template <typename T> class duration_scalar;
66 
67 class string_scalar_device_view;
68 template <typename T> class numeric_scalar_device_view;
69 template <typename T> class fixed_point_scalar_device_view;
70 template <typename T> class timestamp_scalar_device_view;
71 template <typename T> class duration_scalar_device_view;
72 // clang-format on
73 
74 class table;
75 class table_view;
76 class mutable_table_view;
77 
84 using size_type = int32_t;
85 using bitmask_type = uint32_t;
86 using valid_type = uint8_t;
87 using thread_index_type = int64_t;
88 using char_utf8 = uint32_t;
89 
98 template <typename T>
100 {
101  return static_cast<size_type>(std::distance(f, l));
102 }
103 
107 enum class order : bool {
108  ASCENDING,
109  DESCENDING
110 };
111 
115 enum class null_policy : bool {
116  EXCLUDE,
117  INCLUDE
118 };
119 
123 enum class nan_policy : bool {
124  NAN_IS_NULL,
125  NAN_IS_VALID
126 };
127 
132 enum class nan_equality /*unspecified*/ {
133  ALL_EQUAL,
134  UNEQUAL
135 };
136 
140 enum class null_equality : bool {
141  EQUAL,
142  UNEQUAL
143 };
144 
148 enum class null_order : bool {
149  AFTER,
150  BEFORE
151 };
152 
156 enum class sorted : bool { NO, YES };
157 
161 struct order_info {
165 };
166 
170 enum class mask_state : int32_t {
171  UNALLOCATED,
172  UNINITIALIZED,
173  ALL_VALID,
174  ALL_NULL
175 };
176 
181 enum class interpolation : int32_t {
182  LINEAR,
183  LOWER,
184  HIGHER,
185  MIDPOINT,
186  NEAREST
187 };
188 
192 enum class type_id : int32_t {
193  EMPTY,
194  INT8,
195  INT16,
196  INT32,
197  INT64,
198  UINT8,
199  UINT16,
200  UINT32,
201  UINT64,
202  FLOAT32,
203  FLOAT64,
204  BOOL8,
210  DURATION_DAYS,
215  DICTIONARY32,
216  STRING,
217  LIST,
218  DECIMAL32,
219  DECIMAL64,
220  DECIMAL128,
221  STRUCT,
222  // `NUM_TYPE_IDS` must be last!
223  NUM_TYPE_IDS
224 };
225 
227 enum class null_aware : bool {
228  NO = 0,
229  YES = 1
230 };
231 
238 class data_type {
239  public:
240  data_type() = default;
241  ~data_type() = default;
242  data_type(data_type const&) = default;
243  data_type(data_type&&) = default;
244 
250  data_type& operator=(data_type const&) = default;
251 
258 
264  CUDF_HOST_DEVICE explicit constexpr data_type(type_id id) : _id{id} {}
265 
272  explicit data_type(type_id id, int32_t scale) : _id{id}, _fixed_point_scale{scale}
273  {
274  assert(id == type_id::DECIMAL32 || id == type_id::DECIMAL64 || id == type_id::DECIMAL128);
275  }
276 
282  [[nodiscard]] CUDF_HOST_DEVICE constexpr type_id id() const noexcept { return _id; }
283 
289  [[nodiscard]] CUDF_HOST_DEVICE constexpr int32_t scale() const noexcept
290  {
291  return _fixed_point_scale;
292  }
293 
294  private:
295  type_id _id{type_id::EMPTY};
296 
297  // Below is additional type specific metadata. Currently, only _fixed_point_scale is stored.
298 
299  int32_t _fixed_point_scale{}; // numeric::scale_type not available here, use int32_t
300 };
301 
314 constexpr bool operator==(data_type const& lhs, data_type const& rhs)
315 {
316  // use std::tie in the future, breaks JITIFY currently
317  return lhs.id() == rhs.id() && lhs.scale() == rhs.scale();
318 }
319 
332 inline bool operator!=(data_type const& lhs, data_type const& rhs) { return !(lhs == rhs); }
333 
344 std::size_t size_of(data_type t);
345 
347 } // namespace CUDF_EXPORT cudf
Indicator for the logical data type of an element in a column.
Definition: types.hpp:238
data_type & operator=(data_type &&)=default
Move assignment operator for data_type.
data_type(data_type &&)=default
Move constructor.
constexpr CUDF_HOST_DEVICE type_id id() const noexcept
Returns the type identifier.
Definition: types.hpp:282
data_type(type_id id, int32_t scale)
Construct a new data_type object for numeric::fixed_point
Definition: types.hpp:272
data_type & operator=(data_type const &)=default
Copy assignment operator for data_type.
data_type(data_type const &)=default
Copy constructor.
constexpr CUDF_HOST_DEVICE data_type(type_id id)
Construct a new data_type object.
Definition: types.hpp:264
constexpr CUDF_HOST_DEVICE int32_t scale() const noexcept
Returns the scale (for fixed_point types)
Definition: types.hpp:289
@ LOWER
all lower case characters
null_order
Indicates how null values compare against all other values.
Definition: types.hpp:148
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:140
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:84
null_policy
Enum to specify whether to include nulls or exclude nulls.
Definition: types.hpp:115
uint32_t bitmask_type
Bitmask type stored as 32-bit unsigned integer.
Definition: types.hpp:85
size_type distance(T f, T l)
Similar to std::distance but returns cudf::size_type and performs static_cast
Definition: types.hpp:99
constexpr bool operator==(data_type const &lhs, data_type const &rhs)
Compares two data_type objects for equality.
Definition: types.hpp:314
null_aware
Indicates whether a function is null-aware or not.
Definition: types.hpp:227
mask_state
Controls the allocation/initialization of a null mask.
Definition: types.hpp:170
std::size_t size_of(data_type t)
Returns the size in bytes of elements of the specified data_type
int64_t thread_index_type
Thread index type in kernels.
Definition: types.hpp:87
nan_policy
Enum to treat NaN floating point value as null or non-null element.
Definition: types.hpp:123
order
Indicates the order in which elements should be sorted.
Definition: types.hpp:107
bool operator!=(data_type const &lhs, data_type const &rhs)
Compares two data_type objects for inequality.
Definition: types.hpp:332
uint8_t valid_type
Valid type in host memory.
Definition: types.hpp:86
interpolation
Interpolation method to use when the desired quantile lies between two data points i and j.
Definition: types.hpp:181
sorted
Indicates whether a collection of values is known to be sorted.
Definition: types.hpp:156
type_id
Identifies a column's logical element type.
Definition: types.hpp:192
nan_equality
Enum to consider different elements (of floating point types) holding NaN value as equal or unequal.
Definition: types.hpp:132
uint32_t char_utf8
UTF-8 characters are 1-4 bytes.
Definition: string_view.hpp:20
@ BEFORE
NULL values ordered before all other values.
@ AFTER
NULL values ordered after all other values.
@ INCLUDE
include null elements
@ EXCLUDE
exclude null elements
@ ALL_VALID
Null mask allocated, initialized to all elements valid.
@ UNALLOCATED
Null mask not allocated, (all elements are valid)
@ ALL_NULL
Null mask allocated, initialized to all elements NULL.
@ UNINITIALIZED
Null mask allocated, but not initialized.
@ NAN_IS_VALID
treat nans as valid elements (non-null)
@ NAN_IS_NULL
treat nans as null elements
@ HIGHER
Higher data point (j)
@ LINEAR
Linear interpolation between i and j.
@ NEAREST
i or j, whichever is nearest
@ BOOL8
Boolean using one byte per value, 0 == false, else true.
@ FLOAT64
8 byte floating point
@ UINT32
4 byte unsigned integer
@ DURATION_MILLISECONDS
time interval of milliseconds in int64
@ NUM_TYPE_IDS
Total number of type ids.
@ UINT16
2 byte unsigned integer
@ DECIMAL128
Fixed-point type with __int128_t.
@ INT16
2 byte signed integer
@ TIMESTAMP_MILLISECONDS
point in time in milliseconds since Unix Epoch in int64
@ DURATION_NANOSECONDS
time interval of nanoseconds in int64
@ DURATION_DAYS
time interval of days in int32
@ UINT64
8 byte unsigned integer
@ TIMESTAMP_MICROSECONDS
point in time in microseconds since Unix Epoch in int64
@ DURATION_SECONDS
time interval of seconds in int64
@ DURATION_MICROSECONDS
time interval of microseconds in int64
@ FLOAT32
4 byte floating point
@ EMPTY
Always null with no underlying data.
@ TIMESTAMP_SECONDS
point in time in seconds since Unix Epoch in int64
@ TIMESTAMP_NANOSECONDS
point in time in nanoseconds since Unix Epoch in int64
@ TIMESTAMP_DAYS
point in time in days since Unix Epoch in int32
@ DECIMAL64
Fixed-point type with int64_t.
@ DECIMAL32
Fixed-point type with int32_t.
@ UINT8
1 byte unsigned integer
@ INT8
1 byte signed integer
@ DICTIONARY32
Dictionary type using int32 indices.
@ UNEQUAL
All NaNs compare unequal (IEEE754 behavior)
@ ALL_EQUAL
All NaNs compare equal, regardless of sign.
cuDF interfaces
Definition: host_udf.hpp:26
Indicates how a collection of values has been ordered.
Definition: types.hpp:161
order ordering
Indicates the order in which the values are sorted.
Definition: types.hpp:163
null_order null_ordering
Indicates how null values compare against all other values.
Definition: types.hpp:164
sorted is_sorted
Indicates whether the collection is sorted.
Definition: types.hpp:162
#define CUDF_HOST_DEVICE
Indicates that the function or method is usable on host and device.
Definition: types.hpp:21