types.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2021, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #ifdef __CUDACC__
20 #define CUDA_HOST_DEVICE_CALLABLE __host__ __device__ inline
21 #define CUDA_DEVICE_CALLABLE __device__ inline
22 #else
23 #define CUDA_HOST_DEVICE_CALLABLE inline
24 #define CUDA_DEVICE_CALLABLE inline
25 #endif
26 
27 #include <cassert>
28 #include <cstddef>
29 #include <cstdint>
30 #include <iterator>
31 
37 namespace bit_mask {
38 using bit_mask_t = uint32_t;
39 }
40 
41 // Forward declarations
42 namespace rmm {
43 class device_buffer;
44 namespace mr {
45 class device_memory_resource;
46 device_memory_resource* get_current_device_resource();
47 } // namespace mr
48 
49 } // namespace rmm
50 
51 namespace cudf {
52 // Forward declaration
53 class column;
54 class column_view;
55 class mutable_column_view;
56 class string_view;
57 class list_view;
58 class struct_view;
59 
60 class scalar;
61 
62 // clang-format off
63 class list_scalar;
64 class string_scalar;
65 template <typename T> class numeric_scalar;
66 template <typename T> class fixed_point_scalar;
67 template <typename T> class timestamp_scalar;
68 template <typename T> class duration_scalar;
69 
70 class string_scalar_device_view;
71 template <typename T> class numeric_scalar_device_view;
72 template <typename T> class fixed_point_scalar_device_view;
73 template <typename T> class timestamp_scalar_device_view;
74 template <typename T> class duration_scalar_device_view;
75 // clang-format on
76 
77 class struct_scalar;
78 
79 class table;
80 class table_view;
81 class mutable_table_view;
82 
89 using size_type = int32_t;
90 using bitmask_type = uint32_t;
91 using valid_type = uint8_t;
92 using offset_type = int32_t;
93 
102 template <typename T>
103 size_type distance(T f, T l)
104 {
105  return static_cast<size_type>(std::distance(f, l));
106 }
107 
114 static constexpr size_type UNKNOWN_NULL_COUNT{-1};
115 
119 enum class order : bool {
120  ASCENDING,
121  DESCENDING
122 };
123 
127 enum class null_policy : bool {
128  EXCLUDE,
129  INCLUDE
130 };
131 
135 enum class nan_policy : bool {
136  NAN_IS_NULL,
137  NAN_IS_VALID
138 };
139 
144 enum class nan_equality /*unspecified*/ {
145  ALL_EQUAL,
146  UNEQUAL
147 };
148 
152 enum class null_equality : bool {
153  EQUAL,
154  UNEQUAL
155 };
156 
160 enum class null_order : bool {
161  AFTER,
162  BEFORE
163 };
164 
168 enum class sorted : bool { NO, YES };
169 
173 struct order_info {
174  sorted is_sorted;
175  order ordering;
176  null_order null_ordering;
177 };
178 
182 enum class mask_state : int32_t {
183  UNALLOCATED,
184  UNINITIALIZED,
185  ALL_VALID,
186  ALL_NULL
187 };
188 
193 enum class interpolation : int32_t {
194  LINEAR,
195  LOWER,
196  HIGHER,
197  MIDPOINT,
198  NEAREST
199 };
200 
204 enum class type_id : int32_t {
205  EMPTY,
206  INT8,
207  INT16,
208  INT32,
209  INT64,
210  UINT8,
211  UINT16,
212  UINT32,
213  UINT64,
214  FLOAT32,
215  FLOAT64,
216  BOOL8,
217  TIMESTAMP_DAYS,
218  TIMESTAMP_SECONDS,
219  TIMESTAMP_MILLISECONDS,
220  TIMESTAMP_MICROSECONDS,
221  TIMESTAMP_NANOSECONDS,
222  DURATION_DAYS,
223  DURATION_SECONDS,
224  DURATION_MILLISECONDS,
225  DURATION_MICROSECONDS,
226  DURATION_NANOSECONDS,
227  DICTIONARY32,
228  STRING,
229  LIST,
230  DECIMAL32,
231  DECIMAL64,
232  STRUCT,
233  // `NUM_TYPE_IDS` must be last!
234  NUM_TYPE_IDS
235 };
236 
243 class data_type {
244  public:
245  data_type() = default;
246  ~data_type() = default;
247  data_type(data_type const&) = default;
248  data_type(data_type&&) = default;
249  data_type& operator=(data_type const&) = default;
250  data_type& operator=(data_type&&) = default;
251 
257  explicit constexpr data_type(type_id id) : _id{id} {}
258 
265  explicit data_type(type_id id, int32_t scale) : _id{id}, _fixed_point_scale{scale}
266  {
267  assert(id == type_id::DECIMAL32 || id == type_id::DECIMAL64);
268  }
269 
273  constexpr type_id id() const noexcept { return _id; }
274 
278  constexpr int32_t scale() const noexcept { return _fixed_point_scale; }
279 
280  private:
281  type_id _id{type_id::EMPTY};
282 
283  // Below is additional type specific metadata. Currently, only _fixed_point_scale is stored.
284 
285  int32_t _fixed_point_scale{}; // numeric::scale_type not available here, use int32_t
286 };
287 
300 constexpr bool operator==(data_type const& lhs, data_type const& rhs)
301 {
302  // use std::tie in the future, breaks JITIFY currently
303  return lhs.id() == rhs.id() && lhs.scale() == rhs.scale();
304 }
305 
318 inline bool operator!=(data_type const& lhs, data_type const& rhs) { return !(lhs == rhs); }
319 
329 std::size_t size_of(data_type t);
330 
334 enum class hash_id {
335  HASH_IDENTITY = 0,
336  HASH_MURMUR3,
337  HASH_MD5,
338  HASH_SERIAL_MURMUR3,
340 };
341 
345 static constexpr uint32_t DEFAULT_HASH_SEED = 0;
346 
348 } // namespace cudf
cudf::hash_id
hash_id
Identifies the hash function to be used.
Definition: types.hpp:334
cudf::nan_policy
nan_policy
Enum to treat NaN floating point value as null or non-null element.
Definition: types.hpp:135
cudf::data_type::id
constexpr type_id id() const noexcept
Returns the type identifier.
Definition: types.hpp:273
cudf::strings::LOWER
@ LOWER
all upper case characters
Definition: char_types.hpp:46
cudf::interpolation::LINEAR
@ LINEAR
Linear interpolation between i and j.
cudf::null_policy
null_policy
Enum to specify whether to include nulls or exclude nulls.
Definition: types.hpp:127
cudf::type_id
type_id
Identifies a column's logical element type.
Definition: types.hpp:204
cudf::interpolation
interpolation
Interpolation method to use when the desired quantile lies between two data points i and j.
Definition: types.hpp:193
cudf::data_type::data_type
data_type(type_id id, int32_t scale)
Construct a new data_type object for numeric::fixed_point
Definition: types.hpp:265
cudf::data_type::scale
constexpr int32_t scale() const noexcept
Returns the scale (for fixed_point types)
Definition: types.hpp:278
cudf::size_of
std::size_t size_of(data_type t)
Returns the size in bytes of elements of the specified data_type
cudf::null_order
null_order
Indicates how null values compare against all other values.
Definition: types.hpp:160
cudf::order::ASCENDING
@ ASCENDING
Elements ordered from small to large.
cudf::data_type::data_type
constexpr data_type(type_id id)
Construct a new data_type object.
Definition: types.hpp:257
cudf::mask_state
mask_state
Controls the allocation/initialization of a null mask.
Definition: types.hpp:182
cudf::nan_equality
nan_equality
Enum to consider different elements (of floating point types) holding NaN value as equal or unequal.
Definition: types.hpp:144
cudf::nan_equality::ALL_EQUAL
@ ALL_EQUAL
All NaNs compare equal, regardless of sign.
cudf::null_order::AFTER
@ AFTER
NULL values ordered after all other values.
cudf::order_info
Indicates how a collection of values has been ordered.
Definition: types.hpp:173
cudf::hash_id::HASH_IDENTITY
@ HASH_IDENTITY
Identity hash function that simply returns the key to be hashed.
cudf::mask_state::UNALLOCATED
@ UNALLOCATED
Null mask not allocated, (all elements are valid)
cudf::data_type
Indicator for the logical data type of an element in a column.
Definition: types.hpp:243
cudf
cuDF interfaces
Definition: aggregation.hpp:34
cudf::nan_policy::NAN_IS_NULL
@ NAN_IS_NULL
treat nans as null elements
cudf::sorted
sorted
Indicates whether a collection of values is known to be sorted.
Definition: types.hpp:168
cudf::null_policy::EXCLUDE
@ EXCLUDE
exclude null elements
cudf::null_equality::EQUAL
@ EQUAL
nulls compare equal
cudf::operator==
constexpr bool operator==(data_type const &lhs, data_type const &rhs)
Compares two data_type objects for equality.
Definition: types.hpp:300
cudf::null_equality
null_equality
Definition: types.hpp:152
cudf::distance
size_type distance(T f, T l)
Similar to std::distance but returns cudf::size_type and performs static_cast
Definition: types.hpp:103
cudf::operator!=
bool operator!=(data_type const &lhs, data_type const &rhs)
Compares two data_type objects for inequality.
Definition: types.hpp:318
cudf::type_id::EMPTY
@ EMPTY
Always null with no underlying data.
get_current_device_resource
device_memory_resource * get_current_device_resource()
cudf::order
order
Indicates the order in which elements should be sorted.
Definition: types.hpp:119