row_operators.cuh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
28 #pragma once
29 
31 #include <cudf/detail/row_operator/common_utils.cuh>
32 #include <cudf/detail/row_operator/row_operators.cuh>
33 #include <cudf/detail/utilities/assert.cuh>
34 #include <cudf/hashing/detail/hash_functions.cuh>
35 #include <cudf/hashing/detail/hashing.hpp>
39 
40 #include <cuda/std/limits>
41 #include <thrust/equal.h>
42 #include <thrust/execution_policy.h>
43 #include <thrust/iterator/counting_iterator.h>
44 #include <thrust/transform_reduce.h>
45 
46 namespace CUDF_EXPORT cudf {
47 
48 // Aliases for backward compatibility - these provide access to common utilities
49 // that were previously defined inline or elsewhere but are now in detail namespace
50 
58 
69 template <typename Element>
70 __device__ weak_ordering compare_elements(Element lhs, Element rhs)
71 {
72  return cudf::detail::compare_elements(lhs, rhs);
73 }
74 
85 template <typename Element>
86 __device__ weak_ordering relational_compare(Element lhs, Element rhs)
87 {
88  return cudf::detail::relational_compare(lhs, rhs);
89 }
90 
101 inline __device__ auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence)
102 {
103  return cudf::detail::null_compare(lhs_is_null, rhs_is_null, null_precedence);
104 }
105 
116 template <typename Element>
117 __device__ bool equality_compare(Element lhs, Element rhs)
118 {
119  return cudf::detail::equality_compare(lhs, rhs);
120 }
121 
130 template <typename Nullate>
132  public:
144  __host__ __device__
146  column_device_view lhs,
147  column_device_view rhs,
148  null_equality nulls_are_equal = null_equality::EQUAL)
149  : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, nulls_are_equal{nulls_are_equal}
150  {
151  }
152 
160  template <typename Element>
161  __device__ bool operator()(size_type lhs_element_index,
162  size_type rhs_element_index) const noexcept
163  requires(cudf::is_equality_comparable<Element, Element>())
164  {
165  if (nulls) {
166  bool const lhs_is_null{lhs.is_null(lhs_element_index)};
167  bool const rhs_is_null{rhs.is_null(rhs_element_index)};
168  if (lhs_is_null and rhs_is_null) {
169  return nulls_are_equal == null_equality::EQUAL;
170  } else if (lhs_is_null != rhs_is_null) {
171  return false;
172  }
173  }
174 
175  return cudf::detail::equality_compare(lhs.element<Element>(lhs_element_index),
176  rhs.element<Element>(rhs_element_index));
177  }
178 
179  // @cond
180  template <typename Element>
181  __device__ bool operator()(size_type lhs_element_index, size_type rhs_element_index)
182  requires(not cudf::is_equality_comparable<Element, Element>())
183  {
184  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
185  }
186  // @endcond
187 
188  private:
189  column_device_view lhs;
190  column_device_view rhs;
191  Nullate nulls;
192  null_equality nulls_are_equal;
193 };
194 
203 template <typename Nullate>
205  public:
215  table_device_view lhs,
216  table_device_view rhs,
217  null_equality nulls_are_equal = null_equality::EQUAL)
218  : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, nulls_are_equal{nulls_are_equal}
219  {
220  CUDF_EXPECTS(lhs.num_columns() == rhs.num_columns(), "Mismatched number of columns.");
221  }
222 
230  __device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept
231  {
232  auto equal_elements = [lhs_row_index, rhs_row_index, this](column_device_view l,
233  column_device_view r) {
234  return cudf::type_dispatcher(l.type(),
235  element_equality_comparator{nulls, l, r, nulls_are_equal},
236  lhs_row_index,
237  rhs_row_index);
238  };
239 
240  return thrust::equal(thrust::seq, lhs.begin(), lhs.end(), rhs.begin(), equal_elements);
241  }
242 
243  private:
244  table_device_view lhs;
245  table_device_view rhs;
246  Nullate nulls;
247  null_equality nulls_are_equal;
248 };
249 
258 template <typename Nullate>
260  public:
272  __host__ __device__ element_relational_comparator(Nullate has_nulls,
273  column_device_view lhs,
274  column_device_view rhs,
275  null_order null_precedence)
276  : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, null_precedence{null_precedence}
277  {
278  }
279 
288  __host__ __device__ element_relational_comparator(Nullate has_nulls,
289  column_device_view lhs,
290  column_device_view rhs)
291  : lhs{lhs}, rhs{rhs}, nulls{has_nulls}
292  {
293  }
294 
303  template <typename Element>
304  __device__ cudf::detail::weak_ordering operator()(size_type lhs_element_index,
305  size_type rhs_element_index) const noexcept
306  requires(cudf::is_relationally_comparable<Element, Element>())
307  {
308  if (nulls) {
309  bool const lhs_is_null{lhs.is_null(lhs_element_index)};
310  bool const rhs_is_null{rhs.is_null(rhs_element_index)};
311 
312  if (lhs_is_null or rhs_is_null) { // at least one is null
313  return cudf::detail::null_compare(lhs_is_null, rhs_is_null, null_precedence);
314  }
315  }
316 
317  return cudf::detail::relational_compare(lhs.element<Element>(lhs_element_index),
318  rhs.element<Element>(rhs_element_index));
319  }
320 
321  // @cond
322  template <typename Element>
323  __device__ cudf::detail::weak_ordering operator()(size_type lhs_element_index,
324  size_type rhs_element_index)
325  requires(not cudf::is_relationally_comparable<Element, Element>())
326  {
327  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
328  }
329  // @endcond
330 
331  private:
332  column_device_view lhs;
333  column_device_view rhs;
334  Nullate nulls;
335  null_order null_precedence{};
336 };
337 
356 template <typename Nullate>
358  public:
379  table_device_view lhs,
380  table_device_view rhs,
381  order const* column_order = nullptr,
382  null_order const* null_precedence = nullptr)
383  : _lhs{lhs},
384  _rhs{rhs},
385  _nulls{has_nulls},
386  _column_order{column_order},
387  _null_precedence{null_precedence}
388  {
389  CUDF_EXPECTS(_lhs.num_columns() == _rhs.num_columns(), "Mismatched number of columns.");
390  }
391 
401  __device__ bool operator()(size_type lhs_index, size_type rhs_index) const noexcept
402  {
403  for (size_type i = 0; i < _lhs.num_columns(); ++i) {
404  bool ascending = (_column_order == nullptr) or (_column_order[i] == order::ASCENDING);
405 
406  null_order null_precedence =
407  _null_precedence == nullptr ? null_order::BEFORE : _null_precedence[i];
408 
409  auto comparator =
410  element_relational_comparator{_nulls, _lhs.column(i), _rhs.column(i), null_precedence};
411 
413  cudf::type_dispatcher(_lhs.column(i).type(), comparator, lhs_index, rhs_index);
414 
415  if (state == cudf::detail::weak_ordering::EQUIVALENT) { continue; }
416 
417  return state ==
418  (ascending ? cudf::detail::weak_ordering::LESS : cudf::detail::weak_ordering::GREATER);
419  }
420  return false;
421  }
422 
423  private:
424  table_device_view _lhs;
425  table_device_view _rhs;
426  Nullate _nulls{};
427  null_order const* _null_precedence{};
428  order const* _column_order{};
429 }; // class row_lexicographic_comparator
430 
440 template <template <typename> class hash_function, typename Nullate>
442  public:
451  template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
452  __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
453  {
454  if (has_nulls && col.is_null(row_index)) {
455  return cuda::std::numeric_limits<hash_value_type>::max();
456  }
457  return hash_function<T>{}(col.element<T>(row_index));
458  }
459 
468  template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
469  __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
470  {
471  CUDF_UNREACHABLE("Unsupported type in hash.");
472  }
473 
474  Nullate has_nulls;
475 };
476 
486 template <template <typename> class hash_function, typename Nullate>
488  public:
495  __device__ element_hasher_with_seed(Nullate has_nulls, uint32_t seed)
496  : _seed{seed}, _has_nulls{has_nulls}
497  {
498  }
499 
507  __device__ element_hasher_with_seed(Nullate has_nulls, uint32_t seed, hash_value_type null_hash)
508  : _seed{seed}, _null_hash{null_hash}, _has_nulls{has_nulls}
509  {
510  }
511 
520  template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
521  __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
522  {
523  if (_has_nulls && col.is_null(row_index)) { return _null_hash; }
524  return hash_function<T>{_seed}(col.element<T>(row_index));
525  }
526 
535  template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
536  __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
537  {
538  CUDF_UNREACHABLE("Unsupported type in hash.");
539  }
540 
541  private:
542  uint32_t _seed{DEFAULT_HASH_SEED};
543  hash_value_type _null_hash{cuda::std::numeric_limits<hash_value_type>::max()};
544  Nullate _has_nulls;
545 };
546 
556 template <template <typename> class hash_function, typename Nullate>
557 class row_hasher {
558  public:
559  row_hasher() = delete;
560 
568  : _table{t}, _has_nulls{has_nulls}
569  {
570  }
579  : _table{t}, _seed(seed), _has_nulls{has_nulls}
580  {
581  }
582 
589  __device__ auto operator()(size_type row_index) const
590  {
591  // Hash the first column w/ the seed
592  auto const initial_hash = cudf::hashing::detail::hash_combine(
593  hash_value_type{0},
594  type_dispatcher<dispatch_storage_type>(
595  _table.column(0).type(),
597  _table.column(0),
598  row_index));
599 
600  // Hashes an element in a column
601  auto hasher = [row_index, this](size_type column_index) {
602  return cudf::type_dispatcher<dispatch_storage_type>(
603  _table.column(column_index).type(),
605  _table.column(column_index),
606  row_index);
607  };
608 
609  // Hash each element and combine all the hash values together
610  return thrust::transform_reduce(
611  thrust::seq,
612  // note that this starts at 1 and not 0 now since we already hashed the first column
613  thrust::make_counting_iterator(1),
614  thrust::make_counting_iterator(_table.num_columns()),
615  hasher,
616  initial_hash,
617  [](hash_value_type lhs, hash_value_type rhs) {
618  return cudf::hashing::detail::hash_combine(lhs, rhs);
619  });
620  }
621 
622  private:
623  table_device_view _table;
624  Nullate _has_nulls;
625  uint32_t _seed{DEFAULT_HASH_SEED};
626 };
627 
628 } // namespace CUDF_EXPORT cudf
An immutable, non-owning view of device data as a column of elements that is trivially copyable and u...
T element(size_type element_index) const noexcept
Returns a copy of the element at the specified index.
bool is_null(size_type element_index) const noexcept
Returns whether the specified element is null.
size_type num_columns() const noexcept
Returns the number of columns.
Performs an equality comparison between two elements in two columns.
bool operator()(size_type lhs_element_index, size_type rhs_element_index) const noexcept requires(cudf
Compares the specified elements for equality.
element_equality_comparator(Nullate has_nulls, column_device_view lhs, column_device_view rhs, null_equality nulls_are_equal=null_equality::EQUAL)
Construct type-dispatched function object for comparing equality between two elements.
Function object for computing the hash value of a row in a column.
element_hasher_with_seed(Nullate has_nulls, uint32_t seed)
Constructs a function object for hashing an element in the given column.
element_hasher_with_seed(Nullate has_nulls, uint32_t seed, hash_value_type null_hash)
Constructs a function object for hashing an element in the given column.
Computes the hash value of an element in the given column.
Nullate has_nulls
A cudf::nullate type describing how to check for nulls.
Performs a relational comparison between two elements in two columns.
element_relational_comparator(Nullate has_nulls, column_device_view lhs, column_device_view rhs)
Construct type-dispatched function object for performing a relational comparison between two elements...
element_relational_comparator(Nullate has_nulls, column_device_view lhs, column_device_view rhs, null_order null_precedence)
Construct type-dispatched function object for performing a relational comparison between two elements...
cudf::detail::weak_ordering operator()(size_type lhs_element_index, size_type rhs_element_index) const noexcept requires(cudf
Performs a relational comparison between the specified elements.
Performs a relational comparison between two elements in two tables.
row_equality_comparator(Nullate has_nulls, table_device_view lhs, table_device_view rhs, null_equality nulls_are_equal=null_equality::EQUAL)
Construct a new row equality comparator object.
bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept
Compares the specified rows for equality.
Computes the hash value of a row in the given table.
CUDF_HOST_DEVICE row_hasher(Nullate has_nulls, table_device_view t, uint32_t seed)
Constructs a row_hasher object with a seed value.
CUDF_HOST_DEVICE row_hasher(Nullate has_nulls, table_device_view t)
Constructs a row_hasher object.
auto operator()(size_type row_index) const
Computes the hash value of the row at row_index in the table
Computes whether one row is lexicographically less than another row.
bool operator()(size_type lhs_index, size_type rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table compares lexicographically less than the row at ...
row_lexicographic_comparator(Nullate has_nulls, table_device_view lhs, table_device_view rhs, order const *column_order=nullptr, null_order const *null_precedence=nullptr)
Construct a function object for performing a lexicographic comparison between the rows of two tables.
Table device view that is usable in device memory.
Column device view class definitions.
uint32_t hash_value_type
Type of hash value.
Definition: hashing.hpp:29
CUDF_HOST_DEVICE constexpr decltype(auto) __forceinline__ type_dispatcher(cudf::data_type dtype, Functor f, Ts &&... args)
Invokes an operator() template with the type instantiation based on the specified cudf::data_type's i...
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:154
null_order
Indicates how null values compare against all other values.
Definition: types.hpp:159
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:151
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:95
order
Indicates the order in which elements should be sorted.
Definition: types.hpp:118
cuDF interfaces
Definition: host_udf.hpp:37
bool has_nulls(table_view const &view)
Returns True if the table has nulls in any of its columns.
weak_ordering compare_elements(Element lhs, Element rhs)
Alias for backward compatibility with legacy row operators.
bool equality_compare(Element lhs, Element rhs)
Alias for backward compatibility with legacy row operators.
auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence)
Alias for backward compatibility with legacy row operators.
cudf::detail::weak_ordering weak_ordering
Alias for backward compatibility with legacy row operators.
weak_ordering relational_compare(Element lhs, Element rhs)
Alias for backward compatibility with legacy row operators.
Table device view class definitions.
Defines the mapping between cudf::type_id runtime type information and concrete C++ types.
#define CUDF_HOST_DEVICE
Indicates that the function or method is usable on host and device.
Definition: types.hpp:32