types.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #ifdef __CUDACC__
23 #define CUDF_HOST_DEVICE __host__ __device__
27 #define CUDF_KERNEL __global__ static
28 #else
32 #define CUDF_HOST_DEVICE
36 #define CUDF_KERNEL static
37 #endif
38 
39 #include <cassert>
40 #include <cstddef>
41 #include <cstdint>
42 #include <iterator>
43 
49 // Forward declarations
51 namespace rmm {
52 class device_buffer;
54 
55 } // namespace rmm
56 
57 namespace cudf {
58 // Forward declaration
59 class column;
60 class column_view;
61 class mutable_column_view;
62 class string_view;
63 class list_view;
64 class struct_view;
65 class scalar;
66 
67 // clang-format off
68 class list_scalar;
69 class struct_scalar;
70 class string_scalar;
71 template <typename T> class numeric_scalar;
72 template <typename T> class fixed_point_scalar;
73 template <typename T> class timestamp_scalar;
74 template <typename T> class duration_scalar;
75 
76 class string_scalar_device_view;
77 template <typename T> class numeric_scalar_device_view;
78 template <typename T> class fixed_point_scalar_device_view;
79 template <typename T> class timestamp_scalar_device_view;
80 template <typename T> class duration_scalar_device_view;
81 // clang-format on
82 
83 class table;
84 class table_view;
85 class mutable_table_view;
86 
93 using size_type = int32_t;
94 using bitmask_type = uint32_t;
95 using valid_type = uint8_t;
96 using thread_index_type = int64_t;
97 using char_utf8 = uint32_t;
98 
107 template <typename T>
109 {
110  return static_cast<size_type>(std::distance(f, l));
111 }
112 
116 enum class order : bool {
117  ASCENDING,
118  DESCENDING
119 };
120 
124 enum class null_policy : bool {
125  EXCLUDE,
126  INCLUDE
127 };
128 
132 enum class nan_policy : bool {
133  NAN_IS_NULL,
134  NAN_IS_VALID
135 };
136 
141 enum class nan_equality /*unspecified*/ {
142  ALL_EQUAL,
143  UNEQUAL
144 };
145 
149 enum class null_equality : bool {
150  EQUAL,
151  UNEQUAL
152 };
153 
157 enum class null_order : bool {
158  AFTER,
159  BEFORE
160 };
161 
165 enum class sorted : bool { NO, YES };
166 
170 struct order_info {
174 };
175 
179 enum class mask_state : int32_t {
180  UNALLOCATED,
181  UNINITIALIZED,
182  ALL_VALID,
183  ALL_NULL
184 };
185 
190 enum class interpolation : int32_t {
191  LINEAR,
192  LOWER,
193  HIGHER,
194  MIDPOINT,
195  NEAREST
196 };
197 
201 enum class type_id : int32_t {
202  EMPTY,
203  INT8,
204  INT16,
205  INT32,
206  INT64,
207  UINT8,
208  UINT16,
209  UINT32,
210  UINT64,
211  FLOAT32,
212  FLOAT64,
213  BOOL8,
219  DURATION_DAYS,
224  DICTIONARY32,
225  STRING,
226  LIST,
227  DECIMAL32,
228  DECIMAL64,
229  DECIMAL128,
230  STRUCT,
231  // `NUM_TYPE_IDS` must be last!
232  NUM_TYPE_IDS
233 };
234 
241 class data_type {
242  public:
243  data_type() = default;
244  ~data_type() = default;
245  data_type(data_type const&) = default;
246  data_type(data_type&&) = default;
247 
253  data_type& operator=(data_type const&) = default;
254 
261 
267  explicit constexpr data_type(type_id id) : _id{id} {}
268 
275  explicit data_type(type_id id, int32_t scale) : _id{id}, _fixed_point_scale{scale}
276  {
277  assert(id == type_id::DECIMAL32 || id == type_id::DECIMAL64 || id == type_id::DECIMAL128);
278  }
279 
285  [[nodiscard]] constexpr type_id id() const noexcept { return _id; }
286 
292  [[nodiscard]] constexpr int32_t scale() const noexcept { return _fixed_point_scale; }
293 
294  private:
295  type_id _id{type_id::EMPTY};
296 
297  // Below is additional type specific metadata. Currently, only _fixed_point_scale is stored.
298 
299  int32_t _fixed_point_scale{}; // numeric::scale_type not available here, use int32_t
300 };
301 
314 constexpr bool operator==(data_type const& lhs, data_type const& rhs)
315 {
316  // use std::tie in the future, breaks JITIFY currently
317  return lhs.id() == rhs.id() && lhs.scale() == rhs.scale();
318 }
319 
332 inline bool operator!=(data_type const& lhs, data_type const& rhs) { return !(lhs == rhs); }
333 
344 std::size_t size_of(data_type t);
345 
347 } // namespace cudf
Indicator for the logical data type of an element in a column.
Definition: types.hpp:241
data_type & operator=(data_type &&)=default
Move assignment operator for data_type.
data_type(data_type &&)=default
Move constructor.
constexpr int32_t scale() const noexcept
Returns the scale (for fixed_point types)
Definition: types.hpp:292
data_type(type_id id, int32_t scale)
Construct a new data_type object for numeric::fixed_point
Definition: types.hpp:275
data_type & operator=(data_type const &)=default
Copy assignment operator for data_type.
constexpr type_id id() const noexcept
Returns the type identifier.
Definition: types.hpp:285
data_type(data_type const &)=default
Copy constructor.
constexpr data_type(type_id id)
Construct a new data_type object.
Definition: types.hpp:267
null_order
Indicates how null values compare against all other values.
Definition: types.hpp:157
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:149
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:93
null_policy
Enum to specify whether to include nulls or exclude nulls.
Definition: types.hpp:124
uint32_t bitmask_type
Bitmask type stored as 32-bit unsigned integer.
Definition: types.hpp:94
size_type distance(T f, T l)
Similar to std::distance but returns cudf::size_type and performs static_cast
Definition: types.hpp:108
constexpr bool operator==(data_type const &lhs, data_type const &rhs)
Compares two data_type objects for equality.
Definition: types.hpp:314
mask_state
Controls the allocation/initialization of a null mask.
Definition: types.hpp:179
std::size_t size_of(data_type t)
Returns the size in bytes of elements of the specified data_type
int64_t thread_index_type
Thread index type in kernels.
Definition: types.hpp:96
nan_policy
Enum to treat NaN floating point value as null or non-null element.
Definition: types.hpp:132
order
Indicates the order in which elements should be sorted.
Definition: types.hpp:116
bool operator!=(data_type const &lhs, data_type const &rhs)
Compares two data_type objects for inequality.
Definition: types.hpp:332
uint8_t valid_type
Valid type in host memory.
Definition: types.hpp:95
interpolation
Interpolation method to use when the desired quantile lies between two data points i and j.
Definition: types.hpp:190
sorted
Indicates whether a collection of values is known to be sorted.
Definition: types.hpp:165
type_id
Identifies a column's logical element type.
Definition: types.hpp:201
nan_equality
Enum to consider different elements (of floating point types) holding NaN value as equal or unequal.
Definition: types.hpp:141
uint32_t char_utf8
UTF-8 characters are 1-4 bytes.
Definition: string_view.hpp:31
@ BEFORE
NULL values ordered before all other values.
@ AFTER
NULL values ordered after all other values.
@ EQUAL
nulls compare equal
@ UNEQUAL
nulls compare unequal
@ INCLUDE
include null elements
@ EXCLUDE
exclude null elements
@ ALL_VALID
Null mask allocated, initialized to all elements valid.
@ UNALLOCATED
Null mask not allocated, (all elements are valid)
@ ALL_NULL
Null mask allocated, initialized to all elements NULL.
@ UNINITIALIZED
Null mask allocated, but not initialized.
@ NAN_IS_VALID
treat nans as valid elements (non-null)
@ NAN_IS_NULL
treat nans as null elements
@ ASCENDING
Elements ordered from small to large.
@ DESCENDING
Elements ordered from large to small.
@ HIGHER
Higher data point (j)
@ LOWER
Lower data point (i)
@ LINEAR
Linear interpolation between i and j.
@ NEAREST
i or j, whichever is nearest
@ BOOL8
Boolean using one byte per value, 0 == false, else true.
@ FLOAT64
8 byte floating point
@ UINT32
4 byte unsigned integer
@ LIST
List elements.
@ DURATION_MILLISECONDS
time interval of milliseconds in int64
@ NUM_TYPE_IDS
Total number of type ids.
@ UINT16
2 byte unsigned integer
@ INT64
8 byte signed integer
@ DECIMAL128
Fixed-point type with __int128_t.
@ INT16
2 byte signed integer
@ TIMESTAMP_MILLISECONDS
point in time in milliseconds since Unix Epoch in int64
@ DURATION_NANOSECONDS
time interval of nanoseconds in int64
@ STRING
String elements.
@ INT32
4 byte signed integer
@ DURATION_DAYS
time interval of days in int32
@ UINT64
8 byte unsigned integer
@ TIMESTAMP_MICROSECONDS
point in time in microseconds since Unix Epoch in int64
@ DURATION_SECONDS
time interval of seconds in int64
@ DURATION_MICROSECONDS
time interval of microseconds in int64
@ FLOAT32
4 byte floating point
@ STRUCT
Struct elements.
@ EMPTY
Always null with no underlying data.
@ TIMESTAMP_SECONDS
point in time in seconds since Unix Epoch in int64
@ TIMESTAMP_NANOSECONDS
point in time in nanoseconds since Unix Epoch in int64
@ TIMESTAMP_DAYS
point in time in days since Unix Epoch in int32
@ DECIMAL64
Fixed-point type with int64_t.
@ DECIMAL32
Fixed-point type with int32_t.
@ UINT8
1 byte unsigned integer
@ INT8
1 byte signed integer
@ DICTIONARY32
Dictionary type using int32 indices.
@ UNEQUAL
All NaNs compare unequal (IEEE754 behavior)
@ ALL_EQUAL
All NaNs compare equal, regardless of sign.
cuDF interfaces
Definition: aggregation.hpp:34
Indicates how a collection of values has been ordered.
Definition: types.hpp:170
order ordering
Indicates the order in which the values are sorted.
Definition: types.hpp:172
null_order null_ordering
Indicates how null values compare against all other values.
Definition: types.hpp:173
sorted is_sorted
Indicates whether the collection is sorted.
Definition: types.hpp:171