types.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #ifdef __CUDACC__
23 #define CUDF_HOST_DEVICE __host__ __device__
27 #define CUDF_KERNEL __global__ static
28 #else
32 #define CUDF_HOST_DEVICE
36 #define CUDF_KERNEL static
37 #endif
38 
39 #include <cudf/utilities/export.hpp>
40 
41 #include <cassert>
42 #include <cstddef>
43 #include <cstdint>
44 #include <iterator>
45 
51 // Forward declarations
53 namespace rmm {
54 class device_buffer;
56 
57 } // namespace rmm
58 
59 namespace CUDF_EXPORT cudf {
60 // Forward declaration
61 class column;
62 class column_view;
63 class mutable_column_view;
64 class string_view;
65 class list_view;
66 class struct_view;
67 class scalar;
68 
69 // clang-format off
70 class list_scalar;
71 class struct_scalar;
72 class string_scalar;
73 template <typename T> class numeric_scalar;
74 template <typename T> class fixed_point_scalar;
75 template <typename T> class timestamp_scalar;
76 template <typename T> class duration_scalar;
77 
78 class string_scalar_device_view;
79 template <typename T> class numeric_scalar_device_view;
80 template <typename T> class fixed_point_scalar_device_view;
81 template <typename T> class timestamp_scalar_device_view;
82 template <typename T> class duration_scalar_device_view;
83 // clang-format on
84 
85 class table;
86 class table_view;
87 class mutable_table_view;
88 
95 using size_type = int32_t;
96 using bitmask_type = uint32_t;
97 using valid_type = uint8_t;
98 using thread_index_type = int64_t;
99 using char_utf8 = uint32_t;
100 
109 template <typename T>
111 {
112  return static_cast<size_type>(std::distance(f, l));
113 }
114 
118 enum class order : bool {
119  ASCENDING,
120  DESCENDING
121 };
122 
126 enum class null_policy : bool {
127  EXCLUDE,
128  INCLUDE
129 };
130 
134 enum class nan_policy : bool {
135  NAN_IS_NULL,
136  NAN_IS_VALID
137 };
138 
143 enum class nan_equality /*unspecified*/ {
144  ALL_EQUAL,
145  UNEQUAL
146 };
147 
151 enum class null_equality : bool {
152  EQUAL,
153  UNEQUAL
154 };
155 
159 enum class null_order : bool {
160  AFTER,
161  BEFORE
162 };
163 
167 enum class sorted : bool { NO, YES };
168 
172 struct order_info {
176 };
177 
181 enum class mask_state : int32_t {
182  UNALLOCATED,
183  UNINITIALIZED,
184  ALL_VALID,
185  ALL_NULL
186 };
187 
192 enum class interpolation : int32_t {
193  LINEAR,
194  LOWER,
195  HIGHER,
196  MIDPOINT,
197  NEAREST
198 };
199 
203 enum class type_id : int32_t {
204  EMPTY,
205  INT8,
206  INT16,
207  INT32,
208  INT64,
209  UINT8,
210  UINT16,
211  UINT32,
212  UINT64,
213  FLOAT32,
214  FLOAT64,
215  BOOL8,
221  DURATION_DAYS,
226  DICTIONARY32,
227  STRING,
228  LIST,
229  DECIMAL32,
230  DECIMAL64,
231  DECIMAL128,
232  STRUCT,
233  // `NUM_TYPE_IDS` must be last!
234  NUM_TYPE_IDS
235 };
236 
243 class data_type {
244  public:
245  data_type() = default;
246  ~data_type() = default;
247  data_type(data_type const&) = default;
248  data_type(data_type&&) = default;
249 
255  data_type& operator=(data_type const&) = default;
256 
263 
269  CUDF_HOST_DEVICE explicit constexpr data_type(type_id id) : _id{id} {}
270 
277  explicit data_type(type_id id, int32_t scale) : _id{id}, _fixed_point_scale{scale}
278  {
279  assert(id == type_id::DECIMAL32 || id == type_id::DECIMAL64 || id == type_id::DECIMAL128);
280  }
281 
287  [[nodiscard]] CUDF_HOST_DEVICE constexpr type_id id() const noexcept { return _id; }
288 
294  [[nodiscard]] CUDF_HOST_DEVICE constexpr int32_t scale() const noexcept
295  {
296  return _fixed_point_scale;
297  }
298 
299  private:
300  type_id _id{type_id::EMPTY};
301 
302  // Below is additional type specific metadata. Currently, only _fixed_point_scale is stored.
303 
304  int32_t _fixed_point_scale{}; // numeric::scale_type not available here, use int32_t
305 };
306 
319 constexpr bool operator==(data_type const& lhs, data_type const& rhs)
320 {
321  // use std::tie in the future, breaks JITIFY currently
322  return lhs.id() == rhs.id() && lhs.scale() == rhs.scale();
323 }
324 
337 inline bool operator!=(data_type const& lhs, data_type const& rhs) { return !(lhs == rhs); }
338 
349 std::size_t size_of(data_type t);
350 
352 } // namespace CUDF_EXPORT cudf
Indicator for the logical data type of an element in a column.
Definition: types.hpp:243
data_type & operator=(data_type &&)=default
Move assignment operator for data_type.
data_type(data_type &&)=default
Move constructor.
constexpr CUDF_HOST_DEVICE type_id id() const noexcept
Returns the type identifier.
Definition: types.hpp:287
data_type(type_id id, int32_t scale)
Construct a new data_type object for numeric::fixed_point
Definition: types.hpp:277
data_type & operator=(data_type const &)=default
Copy assignment operator for data_type.
data_type(data_type const &)=default
Copy constructor.
constexpr CUDF_HOST_DEVICE data_type(type_id id)
Construct a new data_type object.
Definition: types.hpp:269
constexpr CUDF_HOST_DEVICE int32_t scale() const noexcept
Returns the scale (for fixed_point types)
Definition: types.hpp:294
@ LOWER
all lower case characters
null_order
Indicates how null values compare against all other values.
Definition: types.hpp:159
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:151
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:95
null_policy
Enum to specify whether to include nulls or exclude nulls.
Definition: types.hpp:126
uint32_t bitmask_type
Bitmask type stored as 32-bit unsigned integer.
Definition: types.hpp:96
size_type distance(T f, T l)
Similar to std::distance but returns cudf::size_type and performs static_cast
Definition: types.hpp:110
constexpr bool operator==(data_type const &lhs, data_type const &rhs)
Compares two data_type objects for equality.
Definition: types.hpp:319
mask_state
Controls the allocation/initialization of a null mask.
Definition: types.hpp:181
std::size_t size_of(data_type t)
Returns the size in bytes of elements of the specified data_type
int64_t thread_index_type
Thread index type in kernels.
Definition: types.hpp:98
nan_policy
Enum to treat NaN floating point value as null or non-null element.
Definition: types.hpp:134
order
Indicates the order in which elements should be sorted.
Definition: types.hpp:118
bool operator!=(data_type const &lhs, data_type const &rhs)
Compares two data_type objects for inequality.
Definition: types.hpp:337
uint8_t valid_type
Valid type in host memory.
Definition: types.hpp:97
interpolation
Interpolation method to use when the desired quantile lies between two data points i and j.
Definition: types.hpp:192
sorted
Indicates whether a collection of values is known to be sorted.
Definition: types.hpp:167
type_id
Identifies a column's logical element type.
Definition: types.hpp:203
nan_equality
Enum to consider different elements (of floating point types) holding NaN value as equal or unequal.
Definition: types.hpp:143
uint32_t char_utf8
UTF-8 characters are 1-4 bytes.
Definition: string_view.hpp:31
@ BEFORE
NULL values ordered before all other values.
@ AFTER
NULL values ordered after all other values.
@ INCLUDE
include null elements
@ EXCLUDE
exclude null elements
@ ALL_VALID
Null mask allocated, initialized to all elements valid.
@ UNALLOCATED
Null mask not allocated, (all elements are valid)
@ ALL_NULL
Null mask allocated, initialized to all elements NULL.
@ UNINITIALIZED
Null mask allocated, but not initialized.
@ NAN_IS_VALID
treat nans as valid elements (non-null)
@ NAN_IS_NULL
treat nans as null elements
@ ASCENDING
Elements ordered from small to large.
@ DESCENDING
Elements ordered from large to small.
@ HIGHER
Higher data point (j)
@ LINEAR
Linear interpolation between i and j.
@ NEAREST
i or j, whichever is nearest
@ BOOL8
Boolean using one byte per value, 0 == false, else true.
@ FLOAT64
8 byte floating point
@ UINT32
4 byte unsigned integer
@ DURATION_MILLISECONDS
time interval of milliseconds in int64
@ NUM_TYPE_IDS
Total number of type ids.
@ UINT16
2 byte unsigned integer
@ DECIMAL128
Fixed-point type with __int128_t.
@ INT16
2 byte signed integer
@ TIMESTAMP_MILLISECONDS
point in time in milliseconds since Unix Epoch in int64
@ DURATION_NANOSECONDS
time interval of nanoseconds in int64
@ DURATION_DAYS
time interval of days in int32
@ UINT64
8 byte unsigned integer
@ TIMESTAMP_MICROSECONDS
point in time in microseconds since Unix Epoch in int64
@ DURATION_SECONDS
time interval of seconds in int64
@ DURATION_MICROSECONDS
time interval of microseconds in int64
@ FLOAT32
4 byte floating point
@ EMPTY
Always null with no underlying data.
@ TIMESTAMP_SECONDS
point in time in seconds since Unix Epoch in int64
@ TIMESTAMP_NANOSECONDS
point in time in nanoseconds since Unix Epoch in int64
@ TIMESTAMP_DAYS
point in time in days since Unix Epoch in int32
@ DECIMAL64
Fixed-point type with int64_t.
@ DECIMAL32
Fixed-point type with int32_t.
@ UINT8
1 byte unsigned integer
@ INT8
1 byte signed integer
@ DICTIONARY32
Dictionary type using int32 indices.
@ UNEQUAL
All NaNs compare unequal (IEEE754 behavior)
@ ALL_EQUAL
All NaNs compare equal, regardless of sign.
cuDF interfaces
Definition: host_udf.hpp:39
Indicates how a collection of values has been ordered.
Definition: types.hpp:172
order ordering
Indicates the order in which the values are sorted.
Definition: types.hpp:174
null_order null_ordering
Indicates how null values compare against all other values.
Definition: types.hpp:175
sorted is_sorted
Indicates whether the collection is sorted.
Definition: types.hpp:173
#define CUDF_HOST_DEVICE
Indicates that the function or method is usable on host and device.
Definition: types.hpp:32