primitive_row_operators.cuh
1 /*
2  * Copyright (c) 2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
20 #include <cudf/detail/utilities/assert.cuh>
21 #include <cudf/hashing/detail/hash_functions.cuh>
22 #include <cudf/hashing/detail/hashing.hpp>
23 #include <cudf/table/experimental/row_operators.cuh>
24 #include <cudf/table/row_operators.cuh>
28 
29 #include <cuda/std/limits>
30 #include <cuda/std/type_traits>
31 
32 namespace CUDF_EXPORT cudf {
33 
44 
45 namespace row::primitive {
46 
50 template <typename T>
51 using primitive_type_t = cuda::std::conditional_t<cudf::is_numeric<T>(), T, void>;
52 
56 template <cudf::type_id Id>
58  using type = primitive_type_t<id_to_type<Id>>;
59 };
60 
65  public:
75  template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
76  __device__ bool operator()(column_device_view const& lhs,
77  column_device_view const& rhs,
78  size_type lhs_element_index,
79  size_type rhs_element_index) const
80  {
81  return cudf::equality_compare(lhs.element<Element>(lhs_element_index),
82  rhs.element<Element>(rhs_element_index));
83  }
84 
85  // @cond
86  template <typename Element, CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>())>
87  __device__ bool operator()(column_device_view const&,
88  column_device_view const&,
89  size_type,
90  size_type) const
91  {
92  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
93  }
94  // @endcond
95 };
96 
101  public:
112  std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> lhs,
113  std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> rhs,
114  null_equality nulls_are_equal)
115  : _has_nulls{has_nulls}, _lhs{*lhs}, _rhs{*rhs}, _nulls_are_equal{nulls_are_equal}
116  {
117  CUDF_EXPECTS(_lhs.num_columns() == _rhs.num_columns(), "Mismatched number of columns.");
118  }
119 
127  __device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const
128  {
129  if (_has_nulls) {
130  bool const lhs_is_null{_lhs.column(0).is_null(lhs_row_index)};
131  bool const rhs_is_null{_rhs.column(0).is_null(rhs_row_index)};
132  if (lhs_is_null and rhs_is_null) {
133  return _nulls_are_equal == null_equality::EQUAL;
134  } else if (lhs_is_null != rhs_is_null) {
135  return false;
136  }
137  }
138  return cudf::type_dispatcher<dispatch_primitive_type>(_lhs.begin()->type(),
140  _lhs.column(0),
141  _rhs.column(0),
142  lhs_row_index,
143  rhs_row_index);
144  }
145 
146  private:
147  cudf::nullate::DYNAMIC _has_nulls;
148  table_device_view _lhs;
149  table_device_view _rhs;
150  null_equality _nulls_are_equal;
151 };
152 
158 template <template <typename> class Hash>
160  public:
170  template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
171  __device__ hash_value_type operator()(hash_value_type seed,
172  column_device_view const& col,
173  size_type row_index) const
174  {
175  return Hash<T>{seed}(col.element<T>(row_index));
176  }
177 
178  // @cond
179  template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
180  __device__ hash_value_type operator()(hash_value_type, column_device_view const&, size_type) const
181  {
182  CUDF_UNREACHABLE("Unsupported type in hash.");
183  }
184  // @endcond
185 };
186 
192 template <template <typename> class Hash = cudf::hashing::detail::default_hash>
193 class row_hasher {
194  public:
195  row_hasher() = delete;
196 
206  hash_value_type seed = DEFAULT_HASH_SEED)
207  : _has_nulls{has_nulls}, _table{t}, _seed{seed}
208  {
209  }
210 
219  std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> t,
220  hash_value_type seed = DEFAULT_HASH_SEED)
221  : _has_nulls{has_nulls}, _table{*t}, _seed{seed}
222  {
223  }
224 
231  __device__ auto operator()(size_type row_index) const
232  {
233  if (_has_nulls && _table.column(0).is_null(row_index)) {
234  return cuda::std::numeric_limits<hash_value_type>::max();
235  }
236  return cudf::type_dispatcher<dispatch_primitive_type>(
237  _table.column(0).type(), element_hasher<Hash>{}, _seed, _table.column(0), row_index);
238  }
239 
240  private:
241  cudf::nullate::DYNAMIC _has_nulls;
242  table_device_view _table;
243  hash_value_type _seed;
244 };
245 
246 } // namespace row::primitive
247 } // namespace CUDF_EXPORT cudf
An immutable, non-owning view of device data as a column of elements that is trivially copyable and u...
T element(size_type element_index) const noexcept
Returns a copy of the element at the specified index.
Performs an equality comparison between two elements in two columns.
Function object for computing the hash value of a row in a column.
Performs a relational comparison between two elements in two tables.
row_equality_comparator(cudf::nullate::DYNAMIC const &has_nulls, std::shared_ptr< cudf::experimental::row::equality::preprocessed_table > lhs, std::shared_ptr< cudf::experimental::row::equality::preprocessed_table > rhs, null_equality nulls_are_equal)
Construct a new row equality comparator object.
bool operator()(size_type lhs_row_index, size_type rhs_row_index) const
Compares the specified rows for equality.
Computes the hash value of a row in the given table.
auto operator()(size_type row_index) const
Computes the hash value of the row at row_index in the table
row_hasher(cudf::nullate::DYNAMIC const &has_nulls, table_device_view t, hash_value_type seed=DEFAULT_HASH_SEED)
Constructs a row_hasher object with a seed value.
row_hasher(cudf::nullate::DYNAMIC const &has_nulls, std::shared_ptr< cudf::experimental::row::equality::preprocessed_table > t, hash_value_type seed=DEFAULT_HASH_SEED)
Constructs a row_hasher object with a seed value.
Table device view that is usable in device memory.
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:200
A set of cudf::column's of the same size.
Definition: table.hpp:40
Column device view class definitions.
uint32_t hash_value_type
Type of hash value.
Definition: hashing.hpp:29
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:154
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:151
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:95
cuDF interfaces
Definition: host_udf.hpp:37
bool is_primitive_row_op_compatible(cudf::table_view const &table)
Checks if a table is compatible with primitive row operations.
bool has_nulls(table_view const &view)
Returns True if the table has nulls in any of its columns.
bool equality_compare(Element lhs, Element rhs) requires(std
A specialization for floating-point Element type to check if lhs is equivalent to rhs....
nullate::DYNAMIC defers the determination of nullability to run time rather than compile time....
Custom dispatcher for primitive types.
primitive_type_t< id_to_type< Id > > type
The underlying type.
Table device view class definitions.
Defines the mapping between cudf::type_id runtime type information and concrete C++ types.