libcudf: types.hpp Source File

 /*

  * Copyright (c) 2018-2024, NVIDIA CORPORATION.

  *

  * Licensed under the Apache License, Version 2.0 (the "License");

  * you may not use this file except in compliance with the License.

  * You may obtain a copy of the License at

  *

  *     http://www.apache.org/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing, software

  * distributed under the License is distributed on an "AS IS" BASIS,

  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  * See the License for the specific language governing permissions and

  * limitations under the License.

  */


 #pragma once


 #ifdef __CUDACC__

 #define CUDF_HOST_DEVICE __host__ __device__

 #define CUDF_KERNEL __global__ static

 #else

 #define CUDF_HOST_DEVICE

 #define CUDF_KERNEL static

 #endif


 #include <cassert>

 #include <cstddef>

 #include <cstdint>

 #include <iterator>


 // Forward declarations

 namespace rmm {

 class device_buffer;


 }  // namespace rmm


 namespace cudf {

 // Forward declaration

 class column;

 class column_view;

 class mutable_column_view;

 class string_view;

 class list_view;

 class struct_view;

 class scalar;


 // clang-format off

 class list_scalar;

 class struct_scalar;

 class string_scalar;

 template <typename T> class numeric_scalar;

 template <typename T> class fixed_point_scalar;

 template <typename T> class timestamp_scalar;

 template <typename T> class duration_scalar;


 class string_scalar_device_view;

 template <typename T> class numeric_scalar_device_view;

 template <typename T> class fixed_point_scalar_device_view;

 template <typename T> class timestamp_scalar_device_view;

 template <typename T> class duration_scalar_device_view;

 // clang-format on


 class table;

 class table_view;

 class mutable_table_view;


 using size_type         = int32_t;

 using bitmask_type      = uint32_t;

 using valid_type        = uint8_t;

 using thread_index_type = int64_t;

 using char_utf8         = uint32_t;


 template <typename T>

 size_type distance(T f, T l)

 {

   return static_cast<size_type>(std::distance(f, l));

 }


 enum class order : bool {

   ASCENDING,

   DESCENDING

 };


 enum class null_policy : bool {

   EXCLUDE,

   INCLUDE

 };


 enum class nan_policy : bool {

   NAN_IS_NULL,

   NAN_IS_VALID

 };


 enum class nan_equality /*unspecified*/ {

   ALL_EQUAL,

   UNEQUAL

 };


 enum class null_equality : bool {

   EQUAL,

   UNEQUAL

 };


 enum class null_order : bool {

   AFTER,

   BEFORE

 };


 enum class sorted : bool { NO, YES };


 struct order_info {

   sorted is_sorted;

   order ordering;

   null_order null_ordering;

 };


 enum class mask_state : int32_t {

   UNALLOCATED,

   UNINITIALIZED,

   ALL_VALID,

   ALL_NULL

 };


 enum class interpolation : int32_t {

   LINEAR,

   LOWER,

   HIGHER,

   MIDPOINT,

   NEAREST

 };


 enum class type_id : int32_t {

   EMPTY,

   INT8,

   INT16,

   INT32,

   INT64,

   UINT8,

   UINT16,

   UINT32,

   UINT64,

   FLOAT32,

   FLOAT64,

   BOOL8,

   TIMESTAMP_DAYS,

   TIMESTAMP_SECONDS,

   TIMESTAMP_MILLISECONDS,

   TIMESTAMP_MICROSECONDS,

   TIMESTAMP_NANOSECONDS,

   DURATION_DAYS,

   DURATION_SECONDS,

   DURATION_MILLISECONDS,

   DURATION_MICROSECONDS,

   DURATION_NANOSECONDS,

   DICTIONARY32,

   STRING,

   LIST,

   DECIMAL32,

   DECIMAL64,

   DECIMAL128,

   STRUCT,

   // `NUM_TYPE_IDS` must be last!

   NUM_TYPE_IDS

 };


 class data_type {

  public:

   data_type()                 = default;

   ~data_type()                = default;

   data_type(data_type const&) = default;

   data_type(data_type&&)      = default;


   data_type& operator=(data_type const&) = default;


   data_type& operator=(data_type&&) = default;


   explicit constexpr data_type(type_id id) : _id{id} {}


   explicit data_type(type_id id, int32_t scale) : _id{id}, _fixed_point_scale{scale}

   {

     assert(id == type_id::DECIMAL32 || id == type_id::DECIMAL64 || id == type_id::DECIMAL128);

   }


   [[nodiscard]] constexpr type_id id() const noexcept { return _id; }


   [[nodiscard]] constexpr int32_t scale() const noexcept { return _fixed_point_scale; }


  private:

   type_id _id{type_id::EMPTY};


   // Below is additional type specific metadata. Currently, only _fixed_point_scale is stored.


   int32_t _fixed_point_scale{};  // numeric::scale_type not available here, use int32_t

 };


 constexpr bool operator==(data_type const& lhs, data_type const& rhs)

 {

   // use std::tie in the future, breaks JITIFY currently

   return lhs.id() == rhs.id() && lhs.scale() == rhs.scale();

 }


 inline bool operator!=(data_type const& lhs, data_type const& rhs) { return !(lhs == rhs); }


 std::size_t size_of(data_type t);


 }  // namespace cudf

cudf::data_type
Indicator for the logical data type of an element in a column.
Definition: types.hpp:241

cudf::data_type::operator=
data_type & operator=(data_type &&)=default
Move assignment operator for data_type.

cudf::data_type::data_type
data_type(data_type &&)=default
Move constructor.

cudf::data_type::scale
constexpr int32_t scale() const noexcept
Returns the scale (for fixed_point types)
Definition: types.hpp:292

cudf::data_type::data_type
data_type(type_id id, int32_t scale)
Construct a new data_type object for numeric::fixed_point
Definition: types.hpp:275

cudf::data_type::operator=
data_type & operator=(data_type const &)=default
Copy assignment operator for data_type.

cudf::data_type::id
constexpr type_id id() const noexcept
Returns the type identifier.
Definition: types.hpp:285

cudf::data_type::data_type
data_type(data_type const &)=default
Copy constructor.

cudf::data_type::data_type
constexpr data_type(type_id id)
Construct a new data_type object.
Definition: types.hpp:267

cudf::null_order
null_order
Indicates how null values compare against all other values.
Definition: types.hpp:157

cudf::null_equality
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:149

cudf::size_type
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:93

cudf::null_policy
null_policy
Enum to specify whether to include nulls or exclude nulls.
Definition: types.hpp:124

cudf::bitmask_type
uint32_t bitmask_type
Bitmask type stored as 32-bit unsigned integer.
Definition: types.hpp:94

cudf::distance
size_type distance(T f, T l)
Similar to std::distance but returns cudf::size_type and performs static_cast
Definition: types.hpp:108

cudf::operator==
constexpr bool operator==(data_type const &lhs, data_type const &rhs)
Compares two data_type objects for equality.
Definition: types.hpp:314

cudf::mask_state
mask_state
Controls the allocation/initialization of a null mask.
Definition: types.hpp:179

cudf::size_of
std::size_t size_of(data_type t)
Returns the size in bytes of elements of the specified data_type

cudf::thread_index_type
int64_t thread_index_type
Thread index type in kernels.
Definition: types.hpp:96

cudf::nan_policy
nan_policy
Enum to treat NaN floating point value as null or non-null element.
Definition: types.hpp:132

cudf::order
order
Indicates the order in which elements should be sorted.
Definition: types.hpp:116

cudf::operator!=
bool operator!=(data_type const &lhs, data_type const &rhs)
Compares two data_type objects for inequality.
Definition: types.hpp:332

cudf::valid_type
uint8_t valid_type
Valid type in host memory.
Definition: types.hpp:95

cudf::interpolation
interpolation
Interpolation method to use when the desired quantile lies between two data points i and j.
Definition: types.hpp:190

cudf::sorted
sorted
Indicates whether a collection of values is known to be sorted.
Definition: types.hpp:165

cudf::type_id
type_id
Identifies a column's logical element type.
Definition: types.hpp:201

cudf::nan_equality
nan_equality
Enum to consider different elements (of floating point types) holding NaN value as equal or unequal.
Definition: types.hpp:141

cudf::char_utf8
uint32_t char_utf8
UTF-8 characters are 1-4 bytes.
Definition: string_view.hpp:31

cudf::null_order::BEFORE
@ BEFORE
NULL values ordered before all other values.

cudf::null_order::AFTER
@ AFTER
NULL values ordered after all other values.

cudf::null_equality::EQUAL
@ EQUAL
nulls compare equal

cudf::null_equality::UNEQUAL
@ UNEQUAL
nulls compare unequal

cudf::null_policy::INCLUDE
@ INCLUDE
include null elements

cudf::null_policy::EXCLUDE
@ EXCLUDE
exclude null elements

cudf::mask_state::ALL_VALID
@ ALL_VALID
Null mask allocated, initialized to all elements valid.

cudf::mask_state::UNALLOCATED
@ UNALLOCATED
Null mask not allocated, (all elements are valid)

cudf::mask_state::ALL_NULL
@ ALL_NULL
Null mask allocated, initialized to all elements NULL.

cudf::mask_state::UNINITIALIZED
@ UNINITIALIZED
Null mask allocated, but not initialized.

cudf::nan_policy::NAN_IS_VALID
@ NAN_IS_VALID
treat nans as valid elements (non-null)

cudf::nan_policy::NAN_IS_NULL
@ NAN_IS_NULL
treat nans as null elements

cudf::order::ASCENDING
@ ASCENDING
Elements ordered from small to large.

cudf::order::DESCENDING
@ DESCENDING
Elements ordered from large to small.

cudf::interpolation::HIGHER
@ HIGHER
Higher data point (j)

cudf::interpolation::LOWER
@ LOWER
Lower data point (i)

cudf::interpolation::LINEAR
@ LINEAR
Linear interpolation between i and j.

cudf::interpolation::NEAREST
@ NEAREST
i or j, whichever is nearest

cudf::interpolation::MIDPOINT
@ MIDPOINT
(i + j)/2

cudf::type_id::BOOL8
@ BOOL8
Boolean using one byte per value, 0 == false, else true.

cudf::type_id::FLOAT64
@ FLOAT64
8 byte floating point

cudf::type_id::UINT32
@ UINT32
4 byte unsigned integer

cudf::type_id::LIST
@ LIST
List elements.

cudf::type_id::DURATION_MILLISECONDS
@ DURATION_MILLISECONDS
time interval of milliseconds in int64

cudf::type_id::NUM_TYPE_IDS
@ NUM_TYPE_IDS
Total number of type ids.

cudf::type_id::UINT16
@ UINT16
2 byte unsigned integer

cudf::type_id::INT64
@ INT64
8 byte signed integer

cudf::type_id::DECIMAL128
@ DECIMAL128
Fixed-point type with __int128_t.

cudf::type_id::INT16
@ INT16
2 byte signed integer

cudf::type_id::TIMESTAMP_MILLISECONDS
@ TIMESTAMP_MILLISECONDS
point in time in milliseconds since Unix Epoch in int64

cudf::type_id::DURATION_NANOSECONDS
@ DURATION_NANOSECONDS
time interval of nanoseconds in int64

cudf::type_id::STRING
@ STRING
String elements.

cudf::type_id::INT32
@ INT32
4 byte signed integer

cudf::type_id::DURATION_DAYS
@ DURATION_DAYS
time interval of days in int32

cudf::type_id::UINT64
@ UINT64
8 byte unsigned integer

cudf::type_id::TIMESTAMP_MICROSECONDS
@ TIMESTAMP_MICROSECONDS
point in time in microseconds since Unix Epoch in int64

cudf::type_id::DURATION_SECONDS
@ DURATION_SECONDS
time interval of seconds in int64

cudf::type_id::DURATION_MICROSECONDS
@ DURATION_MICROSECONDS
time interval of microseconds in int64

cudf::type_id::FLOAT32
@ FLOAT32
4 byte floating point

cudf::type_id::STRUCT
@ STRUCT
Struct elements.

cudf::type_id::EMPTY
@ EMPTY
Always null with no underlying data.

cudf::type_id::TIMESTAMP_SECONDS
@ TIMESTAMP_SECONDS
point in time in seconds since Unix Epoch in int64

cudf::type_id::TIMESTAMP_NANOSECONDS
@ TIMESTAMP_NANOSECONDS
point in time in nanoseconds since Unix Epoch in int64

cudf::type_id::TIMESTAMP_DAYS
@ TIMESTAMP_DAYS
point in time in days since Unix Epoch in int32

cudf::type_id::DECIMAL64
@ DECIMAL64
Fixed-point type with int64_t.

cudf::type_id::DECIMAL32
@ DECIMAL32
Fixed-point type with int32_t.

cudf::type_id::UINT8
@ UINT8
1 byte unsigned integer

cudf::type_id::INT8
@ INT8
1 byte signed integer

cudf::type_id::DICTIONARY32
@ DICTIONARY32
Dictionary type using int32 indices.

cudf::nan_equality::UNEQUAL
@ UNEQUAL
All NaNs compare unequal (IEEE754 behavior)

cudf::nan_equality::ALL_EQUAL
@ ALL_EQUAL
All NaNs compare equal, regardless of sign.

cudf
cuDF interfaces
Definition: aggregation.hpp:34

cudf::order_info
Indicates how a collection of values has been ordered.
Definition: types.hpp:170

cudf::order_info::ordering
order ordering
Indicates the order in which the values are sorted.
Definition: types.hpp:172

cudf::order_info::null_ordering
null_order null_ordering
Indicates how null values compare against all other values.
Definition: types.hpp:173

cudf::order_info::is_sorted
sorted is_sorted
Indicates whether the collection is sorted.
Definition: types.hpp:171