row_operators.cuh
1 /*
2  * Copyright (c) 2019-2021, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
20 #include <cudf/detail/utilities/assert.cuh>
21 #include <cudf/detail/utilities/hash_functions.cuh>
22 #include <cudf/sorting.hpp>
26 
27 #include <thrust/equal.h>
28 #include <thrust/swap.h>
29 #include <thrust/transform_reduce.h>
30 
31 #include <limits>
32 
33 namespace cudf {
34 
43 enum class weak_ordering {
44  LESS,
45  EQUIVALENT,
46  GREATER
47 };
48 
49 namespace detail {
58 template <typename Element>
59 __device__ weak_ordering compare_elements(Element lhs, Element rhs)
60 {
61  if (lhs < rhs) {
62  return weak_ordering::LESS;
63  } else if (rhs < lhs) {
65  }
66  return weak_ordering::EQUIVALENT;
67 }
68 } // namespace detail
69 
70 /*
71  * @brief A specialization for floating-point `Element` type relational comparison
72  * to derive the order of the elements with respect to `lhs`. Specialization is to
73  * handle `nan` in the order shown below.
74  * `[-Inf, -ve, 0, -0, +ve, +Inf, NaN, NaN, null] (for null_order::AFTER)`
75  * `[null, -Inf, -ve, 0, -0, +ve, +Inf, NaN, NaN] (for null_order::BEFORE)`
76  *
77  * @param[in] lhs first element
78  * @param[in] rhs second element
79  * @return weak_ordering Indicates the relationship between the elements in
80  * the `lhs` and `rhs` columns.
81  */
82 template <typename Element, std::enable_if_t<std::is_floating_point<Element>::value>* = nullptr>
83 __device__ weak_ordering relational_compare(Element lhs, Element rhs)
84 {
85  if (isnan(lhs) and isnan(rhs)) {
86  return weak_ordering::EQUIVALENT;
87  } else if (isnan(rhs)) {
88  return weak_ordering::LESS;
89  } else if (isnan(lhs)) {
91  }
92 
93  return detail::compare_elements(lhs, rhs);
94 }
95 
104 inline __device__ auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence)
105 {
106  if (lhs_is_null and rhs_is_null) { // null <? null
107  return weak_ordering::EQUIVALENT;
108  } else if (lhs_is_null) { // null <? x
109  return (null_precedence == null_order::BEFORE) ? weak_ordering::LESS : weak_ordering::GREATER;
110  } else if (rhs_is_null) { // x <? null
111  return (null_precedence == null_order::AFTER) ? weak_ordering::LESS : weak_ordering::GREATER;
112  }
113  return weak_ordering::EQUIVALENT;
114 }
115 
125 template <typename Element, std::enable_if_t<not std::is_floating_point<Element>::value>* = nullptr>
126 __device__ weak_ordering relational_compare(Element lhs, Element rhs)
127 {
128  return detail::compare_elements(lhs, rhs);
129 }
130 
139 template <typename Element, std::enable_if_t<std::is_floating_point<Element>::value>* = nullptr>
140 __device__ bool equality_compare(Element lhs, Element rhs)
141 {
142  if (isnan(lhs) and isnan(rhs)) { return true; }
143  return lhs == rhs;
144 }
145 
154 template <typename Element, std::enable_if_t<not std::is_floating_point<Element>::value>* = nullptr>
155 __device__ bool equality_compare(Element const lhs, Element const rhs)
156 {
157  return lhs == rhs;
158 }
159 
165 template <bool has_nulls = true>
167  public:
179  column_device_view rhs,
180  bool nulls_are_equal = true)
181  : lhs{lhs}, rhs{rhs}, nulls_are_equal{nulls_are_equal}
182  {
183  }
184 
192  template <typename Element,
193  std::enable_if_t<cudf::is_equality_comparable<Element, Element>()>* = nullptr>
194  __device__ bool operator()(size_type lhs_element_index, size_type rhs_element_index) const
195  noexcept
196  {
197  if (has_nulls) {
198  bool const lhs_is_null{lhs.is_null(lhs_element_index)};
199  bool const rhs_is_null{rhs.is_null(rhs_element_index)};
200  if (lhs_is_null and rhs_is_null) {
201  return nulls_are_equal;
202  } else if (lhs_is_null != rhs_is_null) {
203  return false;
204  }
205  }
206 
207  return equality_compare(lhs.element<Element>(lhs_element_index),
208  rhs.element<Element>(rhs_element_index));
209  }
210 
211  template <typename Element,
212  std::enable_if_t<not cudf::is_equality_comparable<Element, Element>()>* = nullptr>
213  __device__ bool operator()(size_type lhs_element_index, size_type rhs_element_index)
214  {
215  cudf_assert(false && "Attempted to compare elements of uncomparable types.");
216  return false;
217  }
218 
219  private:
220  column_device_view lhs;
221  column_device_view rhs;
222  bool nulls_are_equal;
223 };
224 
225 template <bool has_nulls = true>
227  public:
228  row_equality_comparator(table_device_view lhs, table_device_view rhs, bool nulls_are_equal = true)
229  : lhs{lhs}, rhs{rhs}, nulls_are_equal{nulls_are_equal}
230  {
231  CUDF_EXPECTS(lhs.num_columns() == rhs.num_columns(), "Mismatched number of columns.");
232  }
233 
234  __device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept
235  {
236  auto equal_elements = [=](column_device_view l, column_device_view r) {
237  return cudf::type_dispatcher(l.type(),
238  element_equality_comparator<has_nulls>{l, r, nulls_are_equal},
239  lhs_row_index,
240  rhs_row_index);
241  };
242 
243  return thrust::equal(thrust::seq, lhs.begin(), lhs.end(), rhs.begin(), equal_elements);
244  }
245 
246  private:
247  table_device_view lhs;
248  table_device_view rhs;
249  bool nulls_are_equal;
250 };
251 
257 template <bool has_nulls = true>
259  public:
272  column_device_view rhs,
273  null_order null_precedence)
274  : lhs{lhs}, rhs{rhs}, null_precedence{null_precedence}
275  {
276  }
277 
288  template <typename Element,
289  std::enable_if_t<cudf::is_relationally_comparable<Element, Element>()>* = nullptr>
290  __device__ weak_ordering operator()(size_type lhs_element_index,
291  size_type rhs_element_index) const noexcept
292  {
293  if (has_nulls) {
294  bool const lhs_is_null{lhs.is_null(lhs_element_index)};
295  bool const rhs_is_null{rhs.is_null(rhs_element_index)};
296 
297  if (lhs_is_null or rhs_is_null) { // atleast one is null
298  return null_compare(lhs_is_null, rhs_is_null, null_precedence);
299  }
300  }
301 
302  return relational_compare(lhs.element<Element>(lhs_element_index),
303  rhs.element<Element>(rhs_element_index));
304  }
305 
306  template <typename Element,
307  std::enable_if_t<not cudf::is_relationally_comparable<Element, Element>()>* = nullptr>
308  __device__ weak_ordering operator()(size_type lhs_element_index, size_type rhs_element_index)
309  {
310  cudf_assert(false && "Attempted to compare elements of uncomparable types.");
311  return weak_ordering::LESS;
312  }
313 
314  private:
315  column_device_view lhs;
316  column_device_view rhs;
317  null_order null_precedence;
318 };
319 
335 template <bool has_nulls = true>
337  public:
356  table_device_view rhs,
357  order const* column_order = nullptr,
358  null_order const* null_precedence = nullptr)
359  : _lhs{lhs}, _rhs{rhs}, _column_order{column_order}, _null_precedence{null_precedence}
360  {
361  CUDF_EXPECTS(_lhs.num_columns() == _rhs.num_columns(), "Mismatched number of columns.");
362  CUDF_EXPECTS(detail::is_relationally_comparable(_lhs, _rhs),
363  "Attempted to compare elements of uncomparable types.");
364  }
365 
375  __device__ bool operator()(size_type lhs_index, size_type rhs_index) const noexcept
376  {
377  for (size_type i = 0; i < _lhs.num_columns(); ++i) {
378  bool ascending = (_column_order == nullptr) or (_column_order[i] == order::ASCENDING);
379 
380  weak_ordering state{weak_ordering::EQUIVALENT};
381  null_order null_precedence =
382  _null_precedence == nullptr ? null_order::BEFORE : _null_precedence[i];
383 
384  auto comparator =
385  element_relational_comparator<has_nulls>{_lhs.column(i), _rhs.column(i), null_precedence};
386 
387  state = cudf::type_dispatcher(_lhs.column(i).type(), comparator, lhs_index, rhs_index);
388 
389  if (state == weak_ordering::EQUIVALENT) { continue; }
390 
391  return state == (ascending ? weak_ordering::LESS : weak_ordering::GREATER);
392  }
393  return false;
394  }
395 
396  private:
397  table_device_view _lhs;
398  table_device_view _rhs;
399  null_order const* _null_precedence{};
400  order const* _column_order{};
401 }; // class row_lexicographic_comparator
402 
409 template <template <typename> class hash_function, bool has_nulls = true>
411  public:
412  template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
413  __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
414  {
415  if (has_nulls && col.is_null(row_index)) { return std::numeric_limits<hash_value_type>::max(); }
416  return hash_function<T>{}(col.element<T>(row_index));
417  }
418 
419  template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
420  __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
421  {
422  cudf_assert(false && "Unsupported type in hash.");
423  return {};
424  }
425 };
426 
427 template <template <typename> class hash_function, bool has_nulls = true>
429  public:
430  element_hasher_with_seed() = default;
431  __device__ element_hasher_with_seed(uint32_t seed) : _seed{seed} {}
432  __device__ element_hasher_with_seed(uint32_t seed, hash_value_type null_hash)
433  : _seed{seed}, _null_hash(null_hash)
434  {
435  }
436 
437  template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
438  __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
439  {
440  if (has_nulls && col.is_null(row_index)) { return _null_hash; }
441  return hash_function<T>{_seed}(col.element<T>(row_index));
442  }
443 
444  template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
445  __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
446  {
447  cudf_assert(false && "Unsupported type in hash.");
448  return {};
449  }
450 
451  private:
452  uint32_t _seed{DEFAULT_HASH_SEED};
453  hash_value_type _null_hash{std::numeric_limits<hash_value_type>::max()};
454 };
455 
462 template <template <typename> class hash_function, bool has_nulls = true>
463 class row_hasher {
464  public:
465  row_hasher() = delete;
466  row_hasher(table_device_view t) : _table{t} {}
467  row_hasher(table_device_view t, uint32_t seed) : _table{t}, _seed(seed) {}
468 
469  __device__ auto operator()(size_type row_index) const
470  {
471  auto hash_combiner = [](hash_value_type lhs, hash_value_type rhs) {
472  return hash_function<hash_value_type>{}.hash_combine(lhs, rhs);
473  };
474 
475  // Hash the first column w/ the seed
476  auto const initial_hash =
477  hash_combiner(hash_value_type{0},
478  type_dispatcher(_table.column(0).type(),
480  _table.column(0),
481  row_index));
482 
483  // Hashes an element in a column
484  auto hasher = [=](size_type column_index) {
485  return cudf::type_dispatcher(_table.column(column_index).type(),
487  _table.column(column_index),
488  row_index);
489  };
490 
491  // Hash each element and combine all the hash values together
492  return thrust::transform_reduce(
493  thrust::seq,
494  // note that this starts at 1 and not 0 now since we already hashed the first column
495  thrust::make_counting_iterator(1),
496  thrust::make_counting_iterator(_table.num_columns()),
497  hasher,
498  initial_hash,
499  hash_combiner);
500  }
501 
502  private:
503  table_device_view _table;
504  uint32_t _seed{DEFAULT_HASH_SEED};
505 };
506 
514 template <template <typename> class hash_function, bool has_nulls = true>
516  public:
517  row_hasher_initial_values() = delete;
518  row_hasher_initial_values(table_device_view t, hash_value_type* initial_hash)
519  : _table{t}, _initial_hash(initial_hash)
520  {
521  }
522 
523  __device__ auto operator()(size_type row_index) const
524  {
525  auto hash_combiner = [](hash_value_type lhs, hash_value_type rhs) {
526  return hash_function<hash_value_type>{}.hash_combine(lhs, rhs);
527  };
528 
529  // Hashes an element in a column and combines with an initial value
530  auto hasher = [=](size_type column_index) {
531  auto hash_value = cudf::type_dispatcher(_table.column(column_index).type(),
533  _table.column(column_index),
534  row_index);
535 
536  return hash_combiner(_initial_hash[column_index], hash_value);
537  };
538 
539  // Hash each element and combine all the hash values together
540  return thrust::transform_reduce(thrust::seq,
541  thrust::make_counting_iterator(0),
542  thrust::make_counting_iterator(_table.num_columns()),
543  hasher,
544  hash_value_type{0},
545  hash_combiner);
546  }
547 
548  private:
549  table_device_view _table;
550  hash_value_type* _initial_hash;
551 };
552 
553 } // namespace cudf
cudf::row_hasher_initial_values
Computes the hash value of a row in the given table, combined with an initial hash value for each col...
Definition: row_operators.cuh:515
cudf::type_dispatcher
constexpr decltype(auto) CUDA_HOST_DEVICE_CALLABLE type_dispatcher(cudf::data_type dtype, Functor f, Ts &&... args)
Invokes an operator() template with the type instantiation based on the specified cudf::data_type's i...
Definition: type_dispatcher.hpp:414
cudf::row_lexicographic_comparator
Computes whether one row is lexicographically less than another row.
Definition: row_operators.cuh:336
cudf::null_compare
__device__ auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence)
Compare the nulls according to null order.
Definition: row_operators.cuh:104
cudf::table_device_view
Definition: table_device_view.cuh:81
cudf::column_device_view
An immutable, non-owning view of device data as a column of elements that is trivially copyable and u...
Definition: column_device_view.cuh:276
cudf::element_equality_comparator::element_equality_comparator
__host__ __device__ element_equality_comparator(column_device_view lhs, column_device_view rhs, bool nulls_are_equal=true)
Construct type-dispatched function object for comparing equality between two elements.
Definition: row_operators.cuh:178
cudf::row_hasher
Computes the hash value of a row in the given table.
Definition: row_operators.cuh:463
cudf::element_relational_comparator::element_relational_comparator
__host__ __device__ element_relational_comparator(column_device_view lhs, column_device_view rhs, null_order null_precedence)
Construct type-dispatched function object for performing a relational comparison between two elements...
Definition: row_operators.cuh:271
cudf::element_hasher_with_seed
Definition: row_operators.cuh:428
cudf::null_order
null_order
Indicates how null values compare against all other values.
Definition: types.hpp:160
column_device_view.cuh
Column device view class definitons.
cudf::weak_ordering
weak_ordering
Result type of the element_relational_comparator function object.
Definition: row_operators.cuh:43
cudf::element_relational_comparator::operator()
__device__ weak_ordering operator()(size_type lhs_element_index, size_type rhs_element_index) const noexcept
Performs a relational comparison between the specified elements.
Definition: row_operators.cuh:290
cudf::null_order::AFTER
@ AFTER
NULL values ordered after all other values.
cudf::row_lexicographic_comparator::row_lexicographic_comparator
row_lexicographic_comparator(table_device_view lhs, table_device_view rhs, order const *column_order=nullptr, null_order const *null_precedence=nullptr)
Construct a function object for performing a lexicographic comparison between the rows of two tables.
Definition: row_operators.cuh:355
cudf::element_relational_comparator
Performs a relational comparison between two elements in two columns.
Definition: row_operators.cuh:258
cudf::element_equality_comparator::operator()
__device__ bool operator()(size_type lhs_element_index, size_type rhs_element_index) const noexcept
Compares the specified elements for equality.
Definition: row_operators.cuh:194
cudf
cuDF interfaces
Definition: aggregation.hpp:34
cudf::detail::column_device_view_base::is_null
__device__ bool is_null(size_type element_index) const noexcept
Returns whether the specified element is null.
Definition: column_device_view.cuh:193
cudf::element_hasher
Computes the hash value of an element in the given column.
Definition: row_operators.cuh:410
cudf::relational_compare
__device__ weak_ordering relational_compare(Element lhs, Element rhs)
A specialization for non-floating-point Element type relational comparison to derive the order of the...
Definition: row_operators.cuh:83
cudf::column_device_view::element
__device__ T element(size_type element_index) const noexcept
Returns reference to element at the specified index.
Definition: column_device_view.cuh:313
cudf::weak_ordering::LESS
@ LESS
Indicates a is less than (ordered before) b
table_device_view.cuh
Table device view class definitons.
CUDF_EXPECTS
#define CUDF_EXPECTS(cond, reason)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:62
cudf::row_equality_comparator
Definition: row_operators.cuh:226
cudf::detail::column_device_view_base::type
__host__ __device__ data_type type() const noexcept
Returns the element type.
Definition: column_device_view.cuh:116
cudf::equality_compare
__device__ bool equality_compare(Element lhs, Element rhs)
A specialization for floating-point Element type to check if lhs is equivalent to rhs....
Definition: row_operators.cuh:140
cudf::row_lexicographic_comparator::operator()
__device__ bool operator()(size_type lhs_index, size_type rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table compares lexicographically less than the row at ...
Definition: row_operators.cuh:375
type_dispatcher.hpp
Defines the mapping between cudf::type_id runtime type information and concrete C++ types.
traits.hpp
sorting.hpp
Column APIs for sort and rank.
cudf::element_equality_comparator
Performs an equality comparison between two elements in two columns.
Definition: row_operators.cuh:166
cudf::order
order
Indicates the order in which elements should be sorted.
Definition: types.hpp:119