row_operators.cuh
1 /*
2  * Copyright (c) 2019-2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
20 #include <cudf/detail/utilities/assert.cuh>
21 #include <cudf/hashing/detail/hash_functions.cuh>
22 #include <cudf/hashing/detail/hashing.hpp>
26 
27 #include <cuda/std/limits>
28 #include <thrust/equal.h>
29 #include <thrust/execution_policy.h>
30 #include <thrust/iterator/counting_iterator.h>
31 #include <thrust/transform_reduce.h>
32 
33 namespace CUDF_EXPORT cudf {
34 
43 enum class weak_ordering {
44  LESS,
45  EQUIVALENT,
46  GREATER
47 };
48 
49 namespace detail {
58 template <typename Element>
59 __device__ weak_ordering compare_elements(Element lhs, Element rhs)
60 {
61  if (lhs < rhs) {
62  return weak_ordering::LESS;
63  } else if (rhs < lhs) {
64  return weak_ordering::GREATER;
65  }
66  return weak_ordering::EQUIVALENT;
67 }
68 } // namespace detail
69 
79 template <typename Element>
80 __device__ weak_ordering relational_compare(Element lhs, Element rhs)
81  requires(std::is_floating_point_v<Element>)
82 {
83  if (isnan(lhs) and isnan(rhs)) {
84  return weak_ordering::EQUIVALENT;
85  } else if (isnan(rhs)) {
86  return weak_ordering::LESS;
87  } else if (isnan(lhs)) {
88  return weak_ordering::GREATER;
89  }
90 
91  return detail::compare_elements(lhs, rhs);
92 }
93 
102 inline __device__ auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence)
103 {
104  if (lhs_is_null and rhs_is_null) { // null <? null
105  return weak_ordering::EQUIVALENT;
106  } else if (lhs_is_null) { // null <? x
107  return (null_precedence == null_order::BEFORE) ? weak_ordering::LESS : weak_ordering::GREATER;
108  } else if (rhs_is_null) { // x <? null
109  return (null_precedence == null_order::AFTER) ? weak_ordering::LESS : weak_ordering::GREATER;
110  }
111  return weak_ordering::EQUIVALENT;
112 }
113 
122 template <typename Element>
123 __device__ weak_ordering relational_compare(Element lhs, Element rhs)
124  requires(not std::is_floating_point_v<Element>)
125 {
126  return detail::compare_elements(lhs, rhs);
127 }
128 
137 template <typename Element>
138 __device__ bool equality_compare(Element lhs, Element rhs)
139  requires(std::is_floating_point_v<Element>)
140 {
141  if (isnan(lhs) and isnan(rhs)) { return true; }
142  return lhs == rhs;
143 }
144 
153 template <typename Element>
154 __device__ bool equality_compare(Element const lhs, Element const rhs)
155  requires(not std::is_floating_point_v<Element>)
156 {
157  return lhs == rhs;
158 }
159 
165 template <typename Nullate>
167  public:
179  __host__ __device__
181  column_device_view lhs,
182  column_device_view rhs,
183  null_equality nulls_are_equal = null_equality::EQUAL)
184  : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, nulls_are_equal{nulls_are_equal}
185  {
186  }
187 
195  template <typename Element>
196  __device__ bool operator()(size_type lhs_element_index,
197  size_type rhs_element_index) const noexcept
198  requires(cudf::is_equality_comparable<Element, Element>())
199  {
200  if (nulls) {
201  bool const lhs_is_null{lhs.is_null(lhs_element_index)};
202  bool const rhs_is_null{rhs.is_null(rhs_element_index)};
203  if (lhs_is_null and rhs_is_null) {
204  return nulls_are_equal == null_equality::EQUAL;
205  } else if (lhs_is_null != rhs_is_null) {
206  return false;
207  }
208  }
209 
210  return equality_compare(lhs.element<Element>(lhs_element_index),
211  rhs.element<Element>(rhs_element_index));
212  }
213 
214  // @cond
215  template <typename Element>
216  __device__ bool operator()(size_type lhs_element_index, size_type rhs_element_index)
217  requires(not cudf::is_equality_comparable<Element, Element>())
218  {
219  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
220  }
221  // @endcond
222 
223  private:
224  column_device_view lhs;
225  column_device_view rhs;
226  Nullate nulls;
227  null_equality nulls_are_equal;
228 };
229 
235 template <typename Nullate>
237  public:
247  table_device_view lhs,
248  table_device_view rhs,
249  null_equality nulls_are_equal = null_equality::EQUAL)
250  : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, nulls_are_equal{nulls_are_equal}
251  {
252  CUDF_EXPECTS(lhs.num_columns() == rhs.num_columns(), "Mismatched number of columns.");
253  }
254 
262  __device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept
263  {
264  auto equal_elements = [lhs_row_index, rhs_row_index, this](column_device_view l,
265  column_device_view r) {
266  return cudf::type_dispatcher(l.type(),
267  element_equality_comparator{nulls, l, r, nulls_are_equal},
268  lhs_row_index,
269  rhs_row_index);
270  };
271 
272  return thrust::equal(thrust::seq, lhs.begin(), lhs.end(), rhs.begin(), equal_elements);
273  }
274 
275  private:
276  table_device_view lhs;
277  table_device_view rhs;
278  Nullate nulls;
279  null_equality nulls_are_equal;
280 };
281 
287 template <typename Nullate>
289  public:
301  __host__ __device__ element_relational_comparator(Nullate has_nulls,
302  column_device_view lhs,
303  column_device_view rhs,
304  null_order null_precedence)
305  : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, null_precedence{null_precedence}
306  {
307  }
308 
317  __host__ __device__ element_relational_comparator(Nullate has_nulls,
318  column_device_view lhs,
319  column_device_view rhs)
320  : lhs{lhs}, rhs{rhs}, nulls{has_nulls}
321  {
322  }
323 
332  template <typename Element>
333  __device__ weak_ordering operator()(size_type lhs_element_index,
334  size_type rhs_element_index) const noexcept
335  requires(cudf::is_relationally_comparable<Element, Element>())
336  {
337  if (nulls) {
338  bool const lhs_is_null{lhs.is_null(lhs_element_index)};
339  bool const rhs_is_null{rhs.is_null(rhs_element_index)};
340 
341  if (lhs_is_null or rhs_is_null) { // at least one is null
342  return null_compare(lhs_is_null, rhs_is_null, null_precedence);
343  }
344  }
345 
346  return relational_compare(lhs.element<Element>(lhs_element_index),
347  rhs.element<Element>(rhs_element_index));
348  }
349 
350  // @cond
351  template <typename Element>
352  __device__ weak_ordering operator()(size_type lhs_element_index, size_type rhs_element_index)
353  requires(not cudf::is_relationally_comparable<Element, Element>())
354  {
355  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
356  }
357  // @endcond
358 
359  private:
360  column_device_view lhs;
361  column_device_view rhs;
362  Nullate nulls;
363  null_order null_precedence{};
364 };
365 
381 template <typename Nullate>
383  public:
404  table_device_view lhs,
405  table_device_view rhs,
406  order const* column_order = nullptr,
407  null_order const* null_precedence = nullptr)
408  : _lhs{lhs},
409  _rhs{rhs},
410  _nulls{has_nulls},
411  _column_order{column_order},
412  _null_precedence{null_precedence}
413  {
414  CUDF_EXPECTS(_lhs.num_columns() == _rhs.num_columns(), "Mismatched number of columns.");
415  }
416 
426  __device__ bool operator()(size_type lhs_index, size_type rhs_index) const noexcept
427  {
428  for (size_type i = 0; i < _lhs.num_columns(); ++i) {
429  bool ascending = (_column_order == nullptr) or (_column_order[i] == order::ASCENDING);
430 
431  null_order null_precedence =
432  _null_precedence == nullptr ? null_order::BEFORE : _null_precedence[i];
433 
434  auto comparator =
435  element_relational_comparator{_nulls, _lhs.column(i), _rhs.column(i), null_precedence};
436 
437  weak_ordering state =
438  cudf::type_dispatcher(_lhs.column(i).type(), comparator, lhs_index, rhs_index);
439 
440  if (state == weak_ordering::EQUIVALENT) { continue; }
441 
442  return state == (ascending ? weak_ordering::LESS : weak_ordering::GREATER);
443  }
444  return false;
445  }
446 
447  private:
448  table_device_view _lhs;
449  table_device_view _rhs;
450  Nullate _nulls{};
451  null_order const* _null_precedence{};
452  order const* _column_order{};
453 }; // class row_lexicographic_comparator
454 
461 template <template <typename> class hash_function, typename Nullate>
463  public:
472  template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
473  __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
474  {
475  if (has_nulls && col.is_null(row_index)) {
476  return cuda::std::numeric_limits<hash_value_type>::max();
477  }
478  return hash_function<T>{}(col.element<T>(row_index));
479  }
480 
489  template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
490  __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
491  {
492  CUDF_UNREACHABLE("Unsupported type in hash.");
493  }
494 
495  Nullate has_nulls;
496 };
497 
504 template <template <typename> class hash_function, typename Nullate>
506  public:
513  __device__ element_hasher_with_seed(Nullate has_nulls, uint32_t seed)
514  : _seed{seed}, _has_nulls{has_nulls}
515  {
516  }
517 
525  __device__ element_hasher_with_seed(Nullate has_nulls, uint32_t seed, hash_value_type null_hash)
526  : _seed{seed}, _null_hash{null_hash}, _has_nulls{has_nulls}
527  {
528  }
529 
538  template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
539  __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
540  {
541  if (_has_nulls && col.is_null(row_index)) { return _null_hash; }
542  return hash_function<T>{_seed}(col.element<T>(row_index));
543  }
544 
553  template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
554  __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
555  {
556  CUDF_UNREACHABLE("Unsupported type in hash.");
557  }
558 
559  private:
560  uint32_t _seed{DEFAULT_HASH_SEED};
561  hash_value_type _null_hash{cuda::std::numeric_limits<hash_value_type>::max()};
562  Nullate _has_nulls;
563 };
564 
571 template <template <typename> class hash_function, typename Nullate>
572 class row_hasher {
573  public:
574  row_hasher() = delete;
575 
583  : _table{t}, _has_nulls{has_nulls}
584  {
585  }
594  : _table{t}, _seed(seed), _has_nulls{has_nulls}
595  {
596  }
597 
604  __device__ auto operator()(size_type row_index) const
605  {
606  // Hash the first column w/ the seed
607  auto const initial_hash = cudf::hashing::detail::hash_combine(
608  hash_value_type{0},
609  type_dispatcher<dispatch_storage_type>(
610  _table.column(0).type(),
612  _table.column(0),
613  row_index));
614 
615  // Hashes an element in a column
616  auto hasher = [row_index, this](size_type column_index) {
617  return cudf::type_dispatcher<dispatch_storage_type>(
618  _table.column(column_index).type(),
620  _table.column(column_index),
621  row_index);
622  };
623 
624  // Hash each element and combine all the hash values together
625  return thrust::transform_reduce(
626  thrust::seq,
627  // note that this starts at 1 and not 0 now since we already hashed the first column
628  thrust::make_counting_iterator(1),
629  thrust::make_counting_iterator(_table.num_columns()),
630  hasher,
631  initial_hash,
632  [](hash_value_type lhs, hash_value_type rhs) {
633  return cudf::hashing::detail::hash_combine(lhs, rhs);
634  });
635  }
636 
637  private:
638  table_device_view _table;
639  Nullate _has_nulls;
640  uint32_t _seed{DEFAULT_HASH_SEED};
641 };
642 
643 } // namespace CUDF_EXPORT cudf
An immutable, non-owning view of device data as a column of elements that is trivially copyable and u...
T element(size_type element_index) const noexcept
Returns a copy of the element at the specified index.
bool is_null(size_type element_index) const noexcept
Returns whether the specified element is null.
size_type num_columns() const noexcept
Returns the number of columns.
Performs an equality comparison between two elements in two columns.
bool operator()(size_type lhs_element_index, size_type rhs_element_index) const noexcept requires(cudf
Compares the specified elements for equality.
element_equality_comparator(Nullate has_nulls, column_device_view lhs, column_device_view rhs, null_equality nulls_are_equal=null_equality::EQUAL)
Construct type-dispatched function object for comparing equality between two elements.
Function object for computing the hash value of a row in a column.
element_hasher_with_seed(Nullate has_nulls, uint32_t seed)
Constructs a function object for hashing an element in the given column.
element_hasher_with_seed(Nullate has_nulls, uint32_t seed, hash_value_type null_hash)
Constructs a function object for hashing an element in the given column.
Computes the hash value of an element in the given column.
Nullate has_nulls
A cudf::nullate type describing how to check for nulls.
Performs a relational comparison between two elements in two columns.
element_relational_comparator(Nullate has_nulls, column_device_view lhs, column_device_view rhs)
Construct type-dispatched function object for performing a relational comparison between two elements...
weak_ordering operator()(size_type lhs_element_index, size_type rhs_element_index) const noexcept requires(cudf
Performs a relational comparison between the specified elements.
element_relational_comparator(Nullate has_nulls, column_device_view lhs, column_device_view rhs, null_order null_precedence)
Construct type-dispatched function object for performing a relational comparison between two elements...
Performs a relational comparison between two elements in two tables.
row_equality_comparator(Nullate has_nulls, table_device_view lhs, table_device_view rhs, null_equality nulls_are_equal=null_equality::EQUAL)
Construct a new row equality comparator object.
bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept
Compares the specified rows for equality.
Computes the hash value of a row in the given table.
CUDF_HOST_DEVICE row_hasher(Nullate has_nulls, table_device_view t, uint32_t seed)
Constructs a row_hasher object with a seed value.
CUDF_HOST_DEVICE row_hasher(Nullate has_nulls, table_device_view t)
Constructs a row_hasher object.
auto operator()(size_type row_index) const
Computes the hash value of the row at row_index in the table
Computes whether one row is lexicographically less than another row.
bool operator()(size_type lhs_index, size_type rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table compares lexicographically less than the row at ...
row_lexicographic_comparator(Nullate has_nulls, table_device_view lhs, table_device_view rhs, order const *column_order=nullptr, null_order const *null_precedence=nullptr)
Construct a function object for performing a lexicographic comparison between the rows of two tables.
Table device view that is usable in device memory.
Column device view class definitions.
uint32_t hash_value_type
Type of hash value.
Definition: hashing.hpp:29
CUDF_HOST_DEVICE constexpr decltype(auto) __forceinline__ type_dispatcher(cudf::data_type dtype, Functor f, Ts &&... args)
Invokes an operator() template with the type instantiation based on the specified cudf::data_type's i...
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:154
null_order
Indicates how null values compare against all other values.
Definition: types.hpp:159
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:151
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:95
order
Indicates the order in which elements should be sorted.
Definition: types.hpp:118
cuDF interfaces
Definition: host_udf.hpp:37
bool has_nulls(table_view const &view)
Returns True if the table has nulls in any of its columns.
bool equality_compare(Element lhs, Element rhs) requires(std
A specialization for floating-point Element type to check if lhs is equivalent to rhs....
weak_ordering relational_compare(Element lhs, Element rhs) requires(std
A specialization for floating-point Element type relational comparison to derive the order of the ele...
weak_ordering
Result type of the element_relational_comparator function object.
@ EQUIVALENT
Indicates a is ordered neither before nor after b
auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence)
Compare the nulls according to null order.
Table device view class definitions.
Defines the mapping between cudf::type_id runtime type information and concrete C++ types.
#define CUDF_HOST_DEVICE
Indicates that the function or method is usable on host and device.
Definition: types.hpp:32