row_operators.cuh
1 /*
2  * Copyright (c) 2019-2022, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
20 #include <cudf/detail/hashing.hpp>
21 #include <cudf/detail/utilities/assert.cuh>
22 #include <cudf/detail/utilities/hash_functions.cuh>
23 #include <cudf/sorting.hpp>
27 
28 #include <thrust/equal.h>
29 #include <thrust/swap.h>
30 #include <thrust/transform_reduce.h>
31 
32 #include <limits>
33 
34 namespace cudf {
35 
44 enum class weak_ordering {
45  LESS,
46  EQUIVALENT,
47  GREATER
48 };
49 
50 namespace detail {
59 template <typename Element>
60 __device__ weak_ordering compare_elements(Element lhs, Element rhs)
61 {
62  if (lhs < rhs) {
63  return weak_ordering::LESS;
64  } else if (rhs < lhs) {
66  }
67  return weak_ordering::EQUIVALENT;
68 }
69 } // namespace detail
70 
84 template <typename Element, std::enable_if_t<std::is_floating_point_v<Element>>* = nullptr>
85 __device__ weak_ordering relational_compare(Element lhs, Element rhs)
86 {
87  if (isnan(lhs) and isnan(rhs)) {
88  return weak_ordering::EQUIVALENT;
89  } else if (isnan(rhs)) {
90  return weak_ordering::LESS;
91  } else if (isnan(lhs)) {
93  }
94 
95  return detail::compare_elements(lhs, rhs);
96 }
97 
106 inline __device__ auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence)
107 {
108  if (lhs_is_null and rhs_is_null) { // null <? null
109  return weak_ordering::EQUIVALENT;
110  } else if (lhs_is_null) { // null <? x
111  return (null_precedence == null_order::BEFORE) ? weak_ordering::LESS : weak_ordering::GREATER;
112  } else if (rhs_is_null) { // x <? null
113  return (null_precedence == null_order::AFTER) ? weak_ordering::LESS : weak_ordering::GREATER;
114  }
115  return weak_ordering::EQUIVALENT;
116 }
117 
127 template <typename Element, std::enable_if_t<not std::is_floating_point_v<Element>>* = nullptr>
128 __device__ weak_ordering relational_compare(Element lhs, Element rhs)
129 {
130  return detail::compare_elements(lhs, rhs);
131 }
132 
141 template <typename Element, std::enable_if_t<std::is_floating_point_v<Element>>* = nullptr>
142 __device__ bool equality_compare(Element lhs, Element rhs)
143 {
144  if (isnan(lhs) and isnan(rhs)) { return true; }
145  return lhs == rhs;
146 }
147 
156 template <typename Element, std::enable_if_t<not std::is_floating_point_v<Element>>* = nullptr>
157 __device__ bool equality_compare(Element const lhs, Element const rhs)
158 {
159  return lhs == rhs;
160 }
161 
167 template <typename Nullate>
169  public:
181  __host__ __device__
182  element_equality_comparator(Nullate has_nulls,
183  column_device_view lhs,
184  column_device_view rhs,
185  null_equality nulls_are_equal = null_equality::EQUAL)
186  : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, nulls_are_equal{nulls_are_equal}
187  {
188  }
189 
197  template <typename Element,
198  std::enable_if_t<cudf::is_equality_comparable<Element, Element>()>* = nullptr>
199  __device__ bool operator()(size_type lhs_element_index,
200  size_type rhs_element_index) const noexcept
201  {
202  if (nulls) {
203  bool const lhs_is_null{lhs.is_null(lhs_element_index)};
204  bool const rhs_is_null{rhs.is_null(rhs_element_index)};
205  if (lhs_is_null and rhs_is_null) {
206  return nulls_are_equal == null_equality::EQUAL;
207  } else if (lhs_is_null != rhs_is_null) {
208  return false;
209  }
210  }
211 
212  return equality_compare(lhs.element<Element>(lhs_element_index),
213  rhs.element<Element>(rhs_element_index));
214  }
215 
216  template <typename Element,
217  std::enable_if_t<not cudf::is_equality_comparable<Element, Element>()>* = nullptr>
218  __device__ bool operator()(size_type lhs_element_index, size_type rhs_element_index)
219  {
220  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
221  }
222 
223  private:
224  column_device_view lhs;
225  column_device_view rhs;
226  Nullate nulls;
227  null_equality nulls_are_equal;
228 };
229 
230 template <typename Nullate>
232  public:
233  row_equality_comparator(Nullate has_nulls,
234  table_device_view lhs,
235  table_device_view rhs,
236  null_equality nulls_are_equal = null_equality::EQUAL)
237  : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, nulls_are_equal{nulls_are_equal}
238  {
239  CUDF_EXPECTS(lhs.num_columns() == rhs.num_columns(), "Mismatched number of columns.");
240  }
241 
242  __device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept
243  {
244  auto equal_elements = [=](column_device_view l, column_device_view r) {
245  return cudf::type_dispatcher(l.type(),
246  element_equality_comparator{nulls, l, r, nulls_are_equal},
247  lhs_row_index,
248  rhs_row_index);
249  };
250 
251  return thrust::equal(thrust::seq, lhs.begin(), lhs.end(), rhs.begin(), equal_elements);
252  }
253 
254  private:
255  table_device_view lhs;
256  table_device_view rhs;
257  Nullate nulls;
258  null_equality nulls_are_equal;
259 };
260 
266 template <typename Nullate>
268  public:
280  __host__ __device__ element_relational_comparator(Nullate has_nulls,
281  column_device_view lhs,
282  column_device_view rhs,
283  null_order null_precedence)
284  : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, null_precedence{null_precedence}
285  {
286  }
287 
288  __host__ __device__ element_relational_comparator(Nullate has_nulls,
289  column_device_view lhs,
290  column_device_view rhs)
291  : lhs{lhs}, rhs{rhs}, nulls{has_nulls}
292  {
293  }
294 
303  template <typename Element,
304  std::enable_if_t<cudf::is_relationally_comparable<Element, Element>()>* = nullptr>
305  __device__ weak_ordering operator()(size_type lhs_element_index,
306  size_type rhs_element_index) const noexcept
307  {
308  if (nulls) {
309  bool const lhs_is_null{lhs.is_null(lhs_element_index)};
310  bool const rhs_is_null{rhs.is_null(rhs_element_index)};
311 
312  if (lhs_is_null or rhs_is_null) { // at least one is null
313  return null_compare(lhs_is_null, rhs_is_null, null_precedence);
314  }
315  }
316 
317  return relational_compare(lhs.element<Element>(lhs_element_index),
318  rhs.element<Element>(rhs_element_index));
319  }
320 
321  template <typename Element,
322  std::enable_if_t<not cudf::is_relationally_comparable<Element, Element>()>* = nullptr>
323  __device__ weak_ordering operator()(size_type lhs_element_index, size_type rhs_element_index)
324  {
325  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
326  }
327 
328  private:
329  column_device_view lhs;
330  column_device_view rhs;
331  Nullate nulls;
332  null_order null_precedence{};
333 };
334 
350 template <typename Nullate>
352  public:
372  row_lexicographic_comparator(Nullate has_nulls,
373  table_device_view lhs,
374  table_device_view rhs,
375  order const* column_order = nullptr,
376  null_order const* null_precedence = nullptr)
377  : _lhs{lhs},
378  _rhs{rhs},
379  _nulls{has_nulls},
380  _column_order{column_order},
381  _null_precedence{null_precedence}
382  {
383  CUDF_EXPECTS(_lhs.num_columns() == _rhs.num_columns(), "Mismatched number of columns.");
384  }
385 
395  __device__ bool operator()(size_type lhs_index, size_type rhs_index) const noexcept
396  {
397  for (size_type i = 0; i < _lhs.num_columns(); ++i) {
398  bool ascending = (_column_order == nullptr) or (_column_order[i] == order::ASCENDING);
399 
400  null_order null_precedence =
401  _null_precedence == nullptr ? null_order::BEFORE : _null_precedence[i];
402 
403  auto comparator =
404  element_relational_comparator{_nulls, _lhs.column(i), _rhs.column(i), null_precedence};
405 
406  weak_ordering state =
407  cudf::type_dispatcher(_lhs.column(i).type(), comparator, lhs_index, rhs_index);
408 
409  if (state == weak_ordering::EQUIVALENT) { continue; }
410 
411  return state == (ascending ? weak_ordering::LESS : weak_ordering::GREATER);
412  }
413  return false;
414  }
415 
416  private:
417  table_device_view _lhs;
418  table_device_view _rhs;
419  Nullate _nulls{};
420  null_order const* _null_precedence{};
421  order const* _column_order{};
422 }; // class row_lexicographic_comparator
423 
430 template <template <typename> class hash_function, typename Nullate>
432  public:
433  template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
434  __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
435  {
436  if (has_nulls && col.is_null(row_index)) { return std::numeric_limits<hash_value_type>::max(); }
437  return hash_function<T>{}(col.element<T>(row_index));
438  }
439 
440  template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
441  __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
442  {
443  CUDF_UNREACHABLE("Unsupported type in hash.");
444  }
445 
446  Nullate has_nulls;
447 };
448 
449 template <template <typename> class hash_function, typename Nullate>
451  public:
452  __device__ element_hasher_with_seed(Nullate has_nulls, uint32_t seed)
453  : _seed{seed}, _has_nulls{has_nulls}
454  {
455  }
456 
457  __device__ element_hasher_with_seed(Nullate has_nulls, uint32_t seed, hash_value_type null_hash)
458  : _seed{seed}, _null_hash{null_hash}, _has_nulls{has_nulls}
459  {
460  }
461 
462  template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
463  __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
464  {
465  if (_has_nulls && col.is_null(row_index)) { return _null_hash; }
466  return hash_function<T>{_seed}(col.element<T>(row_index));
467  }
468 
469  template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
470  __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
471  {
472  CUDF_UNREACHABLE("Unsupported type in hash.");
473  }
474 
475  private:
476  uint32_t _seed{DEFAULT_HASH_SEED};
477  hash_value_type _null_hash{std::numeric_limits<hash_value_type>::max()};
478  Nullate _has_nulls;
479 };
480 
487 template <template <typename> class hash_function, typename Nullate>
488 class row_hasher {
489  public:
490  row_hasher() = delete;
491  CUDF_HOST_DEVICE row_hasher(Nullate has_nulls, table_device_view t)
492  : _table{t}, _has_nulls{has_nulls}
493  {
494  }
495  CUDF_HOST_DEVICE row_hasher(Nullate has_nulls, table_device_view t, uint32_t seed)
496  : _table{t}, _seed(seed), _has_nulls{has_nulls}
497  {
498  }
499 
500  __device__ auto operator()(size_type row_index) const
501  {
502  // Hash the first column w/ the seed
503  auto const initial_hash = cudf::detail::hash_combine(
504  hash_value_type{0},
505  type_dispatcher<dispatch_storage_type>(
506  _table.column(0).type(),
508  _table.column(0),
509  row_index));
510 
511  // Hashes an element in a column
512  auto hasher = [=](size_type column_index) {
513  return cudf::type_dispatcher<dispatch_storage_type>(
514  _table.column(column_index).type(),
516  _table.column(column_index),
517  row_index);
518  };
519 
520  // Hash each element and combine all the hash values together
521  return thrust::transform_reduce(
522  thrust::seq,
523  // note that this starts at 1 and not 0 now since we already hashed the first column
524  thrust::make_counting_iterator(1),
525  thrust::make_counting_iterator(_table.num_columns()),
526  hasher,
527  initial_hash,
528  [](hash_value_type lhs, hash_value_type rhs) {
529  return cudf::detail::hash_combine(lhs, rhs);
530  });
531  }
532 
533  private:
534  table_device_view _table;
535  Nullate _has_nulls;
536  uint32_t _seed{DEFAULT_HASH_SEED};
537 };
538 
539 } // namespace cudf
cudf::row_lexicographic_comparator
Computes whether one row is lexicographically less than another row.
Definition: row_operators.cuh:351
cudf::table_device_view
Definition: table_device_view.cuh:82
cudf::column_device_view
An immutable, non-owning view of device data as a column of elements that is trivially copyable and u...
Definition: column_device_view.cuh:311
cudf::row_hasher
Computes the hash value of a row in the given table.
Definition: row_operators.cuh:488
cudf::row_lexicographic_comparator::operator()
bool operator()(size_type lhs_index, size_type rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table compares lexicographically less than the row at ...
Definition: row_operators.cuh:395
cudf::element_hasher_with_seed
Definition: row_operators.cuh:450
cudf::null_order
null_order
Indicates how null values compare against all other values.
Definition: types.hpp:158
column_device_view.cuh
Column device view class definitions.
cudf::weak_ordering
weak_ordering
Result type of the element_relational_comparator function object.
Definition: row_operators.cuh:44
cudf::element_relational_comparator::element_relational_comparator
element_relational_comparator(Nullate has_nulls, column_device_view lhs, column_device_view rhs, null_order null_precedence)
Construct type-dispatched function object for performing a relational comparison between two elements...
Definition: row_operators.cuh:280
cudf::relational_compare
weak_ordering relational_compare(Element lhs, Element rhs)
A specialization for floating-point Element type relational comparison to derive the order of the ele...
Definition: row_operators.cuh:85
cudf::type_dispatcher
CUDF_HOST_DEVICE constexpr decltype(auto) __forceinline__ type_dispatcher(cudf::data_type dtype, Functor f, Ts &&... args)
Invokes an operator() template with the type instantiation based on the specified cudf::data_type's i...
Definition: type_dispatcher.hpp:423
cudf::null_compare
auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence)
Compare the nulls according to null order.
Definition: row_operators.cuh:106
cudf::null_order::AFTER
@ AFTER
NULL values ordered after all other values.
cudf::equality_compare
bool equality_compare(Element lhs, Element rhs)
A specialization for floating-point Element type to check if lhs is equivalent to rhs....
Definition: row_operators.cuh:142
cudf::detail::column_device_view_base::type
data_type type() const noexcept
Returns the element type.
Definition: column_device_view.cuh:147
cudf::element_relational_comparator
Performs a relational comparison between two elements in two columns.
Definition: row_operators.cuh:267
cudf::detail::column_device_view_base::is_null
bool is_null(size_type element_index) const noexcept
Returns whether the specified element is null.
Definition: column_device_view.cuh:227
cudf
cuDF interfaces
Definition: aggregation.hpp:34
cudf::element_hasher
Computes the hash value of an element in the given column.
Definition: row_operators.cuh:431
cudf::weak_ordering::LESS
@ LESS
Indicates a is less than (ordered before) b
table_device_view.cuh
Table device view class definitions.
CUDF_EXPECTS
#define CUDF_EXPECTS(cond, reason)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:62
cudf::row_equality_comparator
Definition: row_operators.cuh:231
cudf::element_equality_comparator::operator()
bool operator()(size_type lhs_element_index, size_type rhs_element_index) const noexcept
Compares the specified elements for equality.
Definition: row_operators.cuh:199
cudf::null_equality
null_equality
Definition: types.hpp:150
type_dispatcher.hpp
Defines the mapping between cudf::type_id runtime type information and concrete C++ types.
traits.hpp
cudf::element_equality_comparator::element_equality_comparator
element_equality_comparator(Nullate has_nulls, column_device_view lhs, column_device_view rhs, null_equality nulls_are_equal=null_equality::EQUAL)
Construct type-dispatched function object for comparing equality between two elements.
Definition: row_operators.cuh:182
sorting.hpp
Column APIs for sort and rank.
cudf::element_relational_comparator::operator()
weak_ordering operator()(size_type lhs_element_index, size_type rhs_element_index) const noexcept
Performs a relational comparison between the specified elements.
Definition: row_operators.cuh:305
cudf::row_lexicographic_comparator::row_lexicographic_comparator
row_lexicographic_comparator(Nullate has_nulls, table_device_view lhs, table_device_view rhs, order const *column_order=nullptr, null_order const *null_precedence=nullptr)
Construct a function object for performing a lexicographic comparison between the rows of two tables.
Definition: row_operators.cuh:372
cudf::column_device_view::element
T element(size_type element_index) const noexcept
Returns reference to element at the specified index.
Definition: column_device_view.cuh:348
cudf::element_equality_comparator
Performs an equality comparison between two elements in two columns.
Definition: row_operators.cuh:168
cudf::order
order
Indicates the order in which elements should be sorted.
Definition: types.hpp:117