row_operators.cuh
1 /*
2  * Copyright (c) 2019-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
20 #include <cudf/detail/utilities/assert.cuh>
21 #include <cudf/hashing/detail/hash_functions.cuh>
22 #include <cudf/hashing/detail/hashing.hpp>
26 
27 #include <cuda/std/limits>
28 #include <thrust/equal.h>
29 #include <thrust/execution_policy.h>
30 #include <thrust/iterator/counting_iterator.h>
31 #include <thrust/transform_reduce.h>
32 
33 namespace cudf {
34 
43 enum class weak_ordering {
44  LESS,
45  EQUIVALENT,
46  GREATER
47 };
48 
49 namespace detail {
58 template <typename Element>
59 __device__ weak_ordering compare_elements(Element lhs, Element rhs)
60 {
61  if (lhs < rhs) {
62  return weak_ordering::LESS;
63  } else if (rhs < lhs) {
65  }
67 }
68 } // namespace detail
69 
79 template <typename Element, std::enable_if_t<std::is_floating_point_v<Element>>* = nullptr>
80 __device__ weak_ordering relational_compare(Element lhs, Element rhs)
81 {
82  if (isnan(lhs) and isnan(rhs)) {
84  } else if (isnan(rhs)) {
85  return weak_ordering::LESS;
86  } else if (isnan(lhs)) {
88  }
89 
90  return detail::compare_elements(lhs, rhs);
91 }
92 
101 inline __device__ auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence)
102 {
103  if (lhs_is_null and rhs_is_null) { // null <? null
105  } else if (lhs_is_null) { // null <? x
106  return (null_precedence == null_order::BEFORE) ? weak_ordering::LESS : weak_ordering::GREATER;
107  } else if (rhs_is_null) { // x <? null
108  return (null_precedence == null_order::AFTER) ? weak_ordering::LESS : weak_ordering::GREATER;
109  }
111 }
112 
121 template <typename Element, std::enable_if_t<not std::is_floating_point_v<Element>>* = nullptr>
122 __device__ weak_ordering relational_compare(Element lhs, Element rhs)
123 {
124  return detail::compare_elements(lhs, rhs);
125 }
126 
135 template <typename Element, std::enable_if_t<std::is_floating_point_v<Element>>* = nullptr>
136 __device__ bool equality_compare(Element lhs, Element rhs)
137 {
138  if (isnan(lhs) and isnan(rhs)) { return true; }
139  return lhs == rhs;
140 }
141 
150 template <typename Element, std::enable_if_t<not std::is_floating_point_v<Element>>* = nullptr>
151 __device__ bool equality_compare(Element const lhs, Element const rhs)
152 {
153  return lhs == rhs;
154 }
155 
161 template <typename Nullate>
163  public:
175  __host__ __device__
177  column_device_view lhs,
178  column_device_view rhs,
179  null_equality nulls_are_equal = null_equality::EQUAL)
180  : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, nulls_are_equal{nulls_are_equal}
181  {
182  }
183 
191  template <typename Element,
192  std::enable_if_t<cudf::is_equality_comparable<Element, Element>()>* = nullptr>
193  __device__ bool operator()(size_type lhs_element_index,
194  size_type rhs_element_index) const noexcept
195  {
196  if (nulls) {
197  bool const lhs_is_null{lhs.is_null(lhs_element_index)};
198  bool const rhs_is_null{rhs.is_null(rhs_element_index)};
199  if (lhs_is_null and rhs_is_null) {
200  return nulls_are_equal == null_equality::EQUAL;
201  } else if (lhs_is_null != rhs_is_null) {
202  return false;
203  }
204  }
205 
206  return equality_compare(lhs.element<Element>(lhs_element_index),
207  rhs.element<Element>(rhs_element_index));
208  }
209 
210  // @cond
211  template <typename Element,
212  std::enable_if_t<not cudf::is_equality_comparable<Element, Element>()>* = nullptr>
213  __device__ bool operator()(size_type lhs_element_index, size_type rhs_element_index)
214  {
215  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
216  }
217  // @endcond
218 
219  private:
220  column_device_view lhs;
221  column_device_view rhs;
222  Nullate nulls;
223  null_equality nulls_are_equal;
224 };
225 
231 template <typename Nullate>
233  public:
243  table_device_view lhs,
244  table_device_view rhs,
245  null_equality nulls_are_equal = null_equality::EQUAL)
246  : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, nulls_are_equal{nulls_are_equal}
247  {
248  CUDF_EXPECTS(lhs.num_columns() == rhs.num_columns(), "Mismatched number of columns.");
249  }
250 
258  __device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept
259  {
260  auto equal_elements = [=](column_device_view l, column_device_view r) {
261  return cudf::type_dispatcher(l.type(),
262  element_equality_comparator{nulls, l, r, nulls_are_equal},
263  lhs_row_index,
264  rhs_row_index);
265  };
266 
267  return thrust::equal(thrust::seq, lhs.begin(), lhs.end(), rhs.begin(), equal_elements);
268  }
269 
270  private:
271  table_device_view lhs;
272  table_device_view rhs;
273  Nullate nulls;
274  null_equality nulls_are_equal;
275 };
276 
282 template <typename Nullate>
284  public:
296  __host__ __device__ element_relational_comparator(Nullate has_nulls,
297  column_device_view lhs,
298  column_device_view rhs,
299  null_order null_precedence)
300  : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, null_precedence{null_precedence}
301  {
302  }
303 
312  __host__ __device__ element_relational_comparator(Nullate has_nulls,
313  column_device_view lhs,
314  column_device_view rhs)
315  : lhs{lhs}, rhs{rhs}, nulls{has_nulls}
316  {
317  }
318 
327  template <typename Element,
328  std::enable_if_t<cudf::is_relationally_comparable<Element, Element>()>* = nullptr>
329  __device__ weak_ordering operator()(size_type lhs_element_index,
330  size_type rhs_element_index) const noexcept
331  {
332  if (nulls) {
333  bool const lhs_is_null{lhs.is_null(lhs_element_index)};
334  bool const rhs_is_null{rhs.is_null(rhs_element_index)};
335 
336  if (lhs_is_null or rhs_is_null) { // at least one is null
337  return null_compare(lhs_is_null, rhs_is_null, null_precedence);
338  }
339  }
340 
341  return relational_compare(lhs.element<Element>(lhs_element_index),
342  rhs.element<Element>(rhs_element_index));
343  }
344 
345  // @cond
346  template <typename Element,
347  std::enable_if_t<not cudf::is_relationally_comparable<Element, Element>()>* = nullptr>
348  __device__ weak_ordering operator()(size_type lhs_element_index, size_type rhs_element_index)
349  {
350  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
351  }
352  // @endcond
353 
354  private:
355  column_device_view lhs;
356  column_device_view rhs;
357  Nullate nulls;
358  null_order null_precedence{};
359 };
360 
376 template <typename Nullate>
378  public:
399  table_device_view lhs,
400  table_device_view rhs,
401  order const* column_order = nullptr,
402  null_order const* null_precedence = nullptr)
403  : _lhs{lhs},
404  _rhs{rhs},
405  _nulls{has_nulls},
406  _column_order{column_order},
407  _null_precedence{null_precedence}
408  {
409  CUDF_EXPECTS(_lhs.num_columns() == _rhs.num_columns(), "Mismatched number of columns.");
410  }
411 
421  __device__ bool operator()(size_type lhs_index, size_type rhs_index) const noexcept
422  {
423  for (size_type i = 0; i < _lhs.num_columns(); ++i) {
424  bool ascending = (_column_order == nullptr) or (_column_order[i] == order::ASCENDING);
425 
426  null_order null_precedence =
427  _null_precedence == nullptr ? null_order::BEFORE : _null_precedence[i];
428 
429  auto comparator =
430  element_relational_comparator{_nulls, _lhs.column(i), _rhs.column(i), null_precedence};
431 
432  weak_ordering state =
433  cudf::type_dispatcher(_lhs.column(i).type(), comparator, lhs_index, rhs_index);
434 
435  if (state == weak_ordering::EQUIVALENT) { continue; }
436 
437  return state == (ascending ? weak_ordering::LESS : weak_ordering::GREATER);
438  }
439  return false;
440  }
441 
442  private:
443  table_device_view _lhs;
444  table_device_view _rhs;
445  Nullate _nulls{};
446  null_order const* _null_precedence{};
447  order const* _column_order{};
448 }; // class row_lexicographic_comparator
449 
456 template <template <typename> class hash_function, typename Nullate>
458  public:
467  template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
468  __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
469  {
470  if (has_nulls && col.is_null(row_index)) {
471  return cuda::std::numeric_limits<hash_value_type>::max();
472  }
473  return hash_function<T>{}(col.element<T>(row_index));
474  }
475 
484  template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
485  __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
486  {
487  CUDF_UNREACHABLE("Unsupported type in hash.");
488  }
489 
490  Nullate has_nulls;
491 };
492 
499 template <template <typename> class hash_function, typename Nullate>
501  public:
508  __device__ element_hasher_with_seed(Nullate has_nulls, uint32_t seed)
509  : _seed{seed}, _has_nulls{has_nulls}
510  {
511  }
512 
520  __device__ element_hasher_with_seed(Nullate has_nulls, uint32_t seed, hash_value_type null_hash)
521  : _seed{seed}, _null_hash{null_hash}, _has_nulls{has_nulls}
522  {
523  }
524 
533  template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
534  __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
535  {
536  if (_has_nulls && col.is_null(row_index)) { return _null_hash; }
537  return hash_function<T>{_seed}(col.element<T>(row_index));
538  }
539 
548  template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
549  __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
550  {
551  CUDF_UNREACHABLE("Unsupported type in hash.");
552  }
553 
554  private:
555  uint32_t _seed{DEFAULT_HASH_SEED};
556  hash_value_type _null_hash{cuda::std::numeric_limits<hash_value_type>::max()};
557  Nullate _has_nulls;
558 };
559 
566 template <template <typename> class hash_function, typename Nullate>
567 class row_hasher {
568  public:
569  row_hasher() = delete;
570 
578  : _table{t}, _has_nulls{has_nulls}
579  {
580  }
589  : _table{t}, _seed(seed), _has_nulls{has_nulls}
590  {
591  }
592 
599  __device__ auto operator()(size_type row_index) const
600  {
601  // Hash the first column w/ the seed
602  auto const initial_hash = cudf::hashing::detail::hash_combine(
603  hash_value_type{0},
604  type_dispatcher<dispatch_storage_type>(
605  _table.column(0).type(),
607  _table.column(0),
608  row_index));
609 
610  // Hashes an element in a column
611  auto hasher = [=](size_type column_index) {
612  return cudf::type_dispatcher<dispatch_storage_type>(
613  _table.column(column_index).type(),
615  _table.column(column_index),
616  row_index);
617  };
618 
619  // Hash each element and combine all the hash values together
620  return thrust::transform_reduce(
621  thrust::seq,
622  // note that this starts at 1 and not 0 now since we already hashed the first column
623  thrust::make_counting_iterator(1),
624  thrust::make_counting_iterator(_table.num_columns()),
625  hasher,
626  initial_hash,
627  [](hash_value_type lhs, hash_value_type rhs) {
628  return cudf::hashing::detail::hash_combine(lhs, rhs);
629  });
630  }
631 
632  private:
633  table_device_view _table;
634  Nullate _has_nulls;
635  uint32_t _seed{DEFAULT_HASH_SEED};
636 };
637 
638 } // namespace cudf
An immutable, non-owning view of device data as a column of elements that is trivially copyable and u...
T element(size_type element_index) const noexcept
Returns reference to element at the specified index.
CUDF_HOST_DEVICE data_type type() const noexcept
Returns the element type.
bool is_null(size_type element_index) const noexcept
Returns whether the specified element is null.
ColumnDeviceView const & column(size_type column_index) const noexcept
Returns a reference to the view of the specified column.
size_type num_columns() const noexcept
Returns the number of columns.
Performs an equality comparison between two elements in two columns.
element_equality_comparator(Nullate has_nulls, column_device_view lhs, column_device_view rhs, null_equality nulls_are_equal=null_equality::EQUAL)
Construct type-dispatched function object for comparing equality between two elements.
bool operator()(size_type lhs_element_index, size_type rhs_element_index) const noexcept
Compares the specified elements for equality.
Function object for computing the hash value of a row in a column.
element_hasher_with_seed(Nullate has_nulls, uint32_t seed)
Constructs a function object for hashing an element in the given column.
element_hasher_with_seed(Nullate has_nulls, uint32_t seed, hash_value_type null_hash)
Constructs a function object for hashing an element in the given column.
Computes the hash value of an element in the given column.
Nullate has_nulls
A cudf::nullate type describing how to check for nulls.
Performs a relational comparison between two elements in two columns.
weak_ordering operator()(size_type lhs_element_index, size_type rhs_element_index) const noexcept
Performs a relational comparison between the specified elements.
element_relational_comparator(Nullate has_nulls, column_device_view lhs, column_device_view rhs)
Construct type-dispatched function object for performing a relational comparison between two elements...
element_relational_comparator(Nullate has_nulls, column_device_view lhs, column_device_view rhs, null_order null_precedence)
Construct type-dispatched function object for performing a relational comparison between two elements...
Performs a relational comparison between two elements in two tables.
row_equality_comparator(Nullate has_nulls, table_device_view lhs, table_device_view rhs, null_equality nulls_are_equal=null_equality::EQUAL)
Construct a new row equality comparator object.
bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept
Compares the specified rows for equality.
Computes the hash value of a row in the given table.
CUDF_HOST_DEVICE row_hasher(Nullate has_nulls, table_device_view t, uint32_t seed)
Constructs a row_hasher object with a seed value.
CUDF_HOST_DEVICE row_hasher(Nullate has_nulls, table_device_view t)
Constructs a row_hasher object.
auto operator()(size_type row_index) const
Computes the hash value of the row at row_index in the table
Computes whether one row is lexicographically less than another row.
bool operator()(size_type lhs_index, size_type rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table compares lexicographically less than the row at ...
row_lexicographic_comparator(Nullate has_nulls, table_device_view lhs, table_device_view rhs, order const *column_order=nullptr, null_order const *null_precedence=nullptr)
Construct a function object for performing a lexicographic comparison between the rows of two tables.
Table device view that is usable in device memory.
Column device view class definitions.
uint32_t hash_value_type
Type of hash value.
Definition: hashing.hpp:35
CUDF_HOST_DEVICE constexpr decltype(auto) __forceinline__ type_dispatcher(cudf::data_type dtype, Functor f, Ts &&... args)
Invokes an operator() template with the type instantiation based on the specified cudf::data_type's i...
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:177
null_order
Indicates how null values compare against all other values.
Definition: types.hpp:157
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:149
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:93
order
Indicates the order in which elements should be sorted.
Definition: types.hpp:116
@ BEFORE
NULL values ordered before all other values.
@ AFTER
NULL values ordered after all other values.
@ EQUAL
nulls compare equal
@ ASCENDING
Elements ordered from small to large.
cuDF interfaces
Definition: aggregation.hpp:34
bool has_nulls(table_view const &view)
Returns True if the table has nulls in any of its columns.
weak_ordering
Result type of the element_relational_comparator function object.
@ LESS
Indicates a is less than (ordered before) b
@ EQUIVALENT
Indicates a is ordered neither before nor after b
@ GREATER
Indicates a is greater than (ordered after) b
auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence)
Compare the nulls according to null order.
weak_ordering relational_compare(Element lhs, Element rhs)
A specialization for floating-point Element type relational comparison to derive the order of the ele...
bool equality_compare(Element lhs, Element rhs)
A specialization for floating-point Element type to check if lhs is equivalent to rhs....
Table device view class definitions.
Defines the mapping between cudf::type_id runtime type information and concrete C++ types.
#define CUDF_HOST_DEVICE
Indicates that the function or method is usable on host and device.
Definition: types.hpp:32