row_operators.cuh
1 /*
2  * Copyright (c) 2019-2020, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
20 #include <cudf/detail/utilities/hash_functions.cuh>
21 #include <cudf/detail/utilities/release_assert.cuh>
22 #include <cudf/sorting.hpp>
26 
27 #include <thrust/equal.h>
28 #include <thrust/swap.h>
29 #include <thrust/transform_reduce.h>
30 
31 namespace cudf {
32 
41 enum class weak_ordering {
42  LESS,
43  EQUIVALENT,
44  GREATER
45 };
46 
47 namespace detail {
56 template <typename Element>
57 __device__ weak_ordering compare_elements(Element lhs, Element rhs)
58 {
59  if (lhs < rhs) {
60  return weak_ordering::LESS;
61  } else if (rhs < lhs) {
63  }
64  return weak_ordering::EQUIVALENT;
65 }
66 } // namespace detail
67 
68 /*
69  * @brief A specialization for floating-point `Element` type relational comparison
70  * to derive the order of the elements with respect to `lhs`. Specialization is to
71  * handle `nan` in the order shown below.
72  * `[-Inf, -ve, 0, -0, +ve, +Inf, NaN, NaN, null] (for null_order::AFTER)`
73  * `[null, -Inf, -ve, 0, -0, +ve, +Inf, NaN, NaN] (for null_order::BEFORE)`
74  *
75  * @param[in] lhs first element
76  * @param[in] rhs second element
77  * @return weak_ordering Indicates the relationship between the elements in
78  * the `lhs` and `rhs` columns.
79  */
80 template <typename Element, std::enable_if_t<std::is_floating_point<Element>::value>* = nullptr>
81 __device__ weak_ordering relational_compare(Element lhs, Element rhs)
82 {
83  if (isnan(lhs) and isnan(rhs)) {
84  return weak_ordering::EQUIVALENT;
85  } else if (isnan(rhs)) {
86  return weak_ordering::LESS;
87  } else if (isnan(lhs)) {
89  }
90 
91  return detail::compare_elements(lhs, rhs);
92 }
93 
103 template <typename Element, std::enable_if_t<not std::is_floating_point<Element>::value>* = nullptr>
104 __device__ weak_ordering relational_compare(Element lhs, Element rhs)
105 {
106  return detail::compare_elements(lhs, rhs);
107 }
108 
117 template <typename Element, std::enable_if_t<std::is_floating_point<Element>::value>* = nullptr>
118 __device__ bool equality_compare(Element lhs, Element rhs)
119 {
120  if (isnan(lhs) and isnan(rhs)) { return true; }
121  return lhs == rhs;
122 }
123 
132 template <typename Element, std::enable_if_t<not std::is_floating_point<Element>::value>* = nullptr>
133 __device__ bool equality_compare(Element const lhs, Element const rhs)
134 {
135  return lhs == rhs;
136 }
137 
143 template <bool has_nulls = true>
145  public:
157  column_device_view rhs,
158  bool nulls_are_equal = true)
159  : lhs{lhs}, rhs{rhs}, nulls_are_equal{nulls_are_equal}
160  {
161  }
162 
170  template <typename Element,
171  std::enable_if_t<cudf::is_equality_comparable<Element, Element>()>* = nullptr>
172  __device__ bool operator()(size_type lhs_element_index, size_type rhs_element_index) const
173  noexcept
174  {
175  if (has_nulls) {
176  bool const lhs_is_null{lhs.nullable() and lhs.is_null(lhs_element_index)};
177  bool const rhs_is_null{rhs.nullable() and rhs.is_null(rhs_element_index)};
178  if (lhs_is_null and rhs_is_null) {
179  return nulls_are_equal;
180  } else if (lhs_is_null != rhs_is_null) {
181  return false;
182  }
183  }
184 
185  return equality_compare(lhs.element<Element>(lhs_element_index),
186  rhs.element<Element>(rhs_element_index));
187  }
188 
189  template <typename Element,
190  std::enable_if_t<not cudf::is_equality_comparable<Element, Element>()>* = nullptr>
191  __device__ bool operator()(size_type lhs_element_index, size_type rhs_element_index)
192  {
193  release_assert(false && "Attempted to compare elements of uncomparable types.");
194  return false;
195  }
196 
197  private:
198  column_device_view lhs;
199  column_device_view rhs;
200  bool nulls_are_equal;
201 };
202 
203 template <bool has_nulls = true>
205  public:
206  row_equality_comparator(table_device_view lhs, table_device_view rhs, bool nulls_are_equal = true)
207  : lhs{lhs}, rhs{rhs}, nulls_are_equal{nulls_are_equal}
208  {
209  CUDF_EXPECTS(lhs.num_columns() == rhs.num_columns(), "Mismatched number of columns.");
210  }
211 
212  __device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept
213  {
214  auto equal_elements = [=](column_device_view l, column_device_view r) {
215  return cudf::type_dispatcher(l.type(),
216  element_equality_comparator<has_nulls>{l, r, nulls_are_equal},
217  lhs_row_index,
218  rhs_row_index);
219  };
220 
221  return thrust::equal(thrust::seq, lhs.begin(), lhs.end(), rhs.begin(), equal_elements);
222  }
223 
224  private:
225  table_device_view lhs;
226  table_device_view rhs;
227  bool nulls_are_equal;
228 };
229 
235 template <bool has_nulls = true>
237  public:
250  column_device_view rhs,
251  null_order null_precedence)
252  : lhs{lhs}, rhs{rhs}, null_precedence{null_precedence}
253  {
254  }
255 
266  template <typename Element,
267  std::enable_if_t<cudf::is_relationally_comparable<Element, Element>()>* = nullptr>
268  __device__ weak_ordering operator()(size_type lhs_element_index,
269  size_type rhs_element_index) const noexcept
270  {
271  if (has_nulls) {
272  bool const lhs_is_null{lhs.nullable() and lhs.is_null(lhs_element_index)};
273  bool const rhs_is_null{rhs.nullable() and rhs.is_null(rhs_element_index)};
274 
275  if (lhs_is_null and rhs_is_null) { // null <? null
276  return weak_ordering::EQUIVALENT;
277  } else if (lhs_is_null) { // null <? x
278  return (null_precedence == null_order::BEFORE) ? weak_ordering::LESS
280  } else if (rhs_is_null) { // x <? null
281  return (null_precedence == null_order::AFTER) ? weak_ordering::LESS
283  }
284  }
285 
286  return relational_compare(lhs.element<Element>(lhs_element_index),
287  rhs.element<Element>(rhs_element_index));
288  }
289 
290  template <typename Element,
291  std::enable_if_t<not cudf::is_relationally_comparable<Element, Element>()>* = nullptr>
292  __device__ weak_ordering operator()(size_type lhs_element_index, size_type rhs_element_index)
293  {
294  release_assert(false && "Attempted to compare elements of uncomparable types.");
295  return weak_ordering::LESS;
296  }
297 
298  private:
299  column_device_view lhs;
300  column_device_view rhs;
301  null_order null_precedence;
302 };
303 
319 template <bool has_nulls = true>
321  public:
339  table_device_view rhs,
340  order const* column_order = nullptr,
341  null_order const* null_precedence = nullptr)
342  : _lhs{lhs}, _rhs{rhs}, _column_order{column_order}, _null_precedence{null_precedence}
343  {
344  // Add check for types to be the same.
345  CUDF_EXPECTS(_lhs.num_columns() == _rhs.num_columns(), "Mismatched number of columns.");
346  }
347 
357  __device__ bool operator()(size_type lhs_index, size_type rhs_index) const noexcept
358  {
359  for (size_type i = 0; i < _lhs.num_columns(); ++i) {
360  bool ascending = (_column_order == nullptr) or (_column_order[i] == order::ASCENDING);
361 
362  weak_ordering state{weak_ordering::EQUIVALENT};
363  null_order null_precedence =
364  _null_precedence == nullptr ? null_order::BEFORE : _null_precedence[i];
365 
366  auto comparator =
367  element_relational_comparator<has_nulls>{_lhs.column(i), _rhs.column(i), null_precedence};
368 
369  state = cudf::type_dispatcher(_lhs.column(i).type(), comparator, lhs_index, rhs_index);
370 
371  if (state == weak_ordering::EQUIVALENT) { continue; }
372 
373  return state == (ascending ? weak_ordering::LESS : weak_ordering::GREATER);
374  }
375  return false;
376  }
377 
378  private:
379  table_device_view _lhs;
380  table_device_view _rhs;
381  null_order const* _null_precedence{};
382  order const* _column_order{};
383 }; // class row_lexicographic_comparator
384 
391 template <template <typename> class hash_function, bool has_nulls = true>
393  public:
394  template <typename T>
395  __device__ inline hash_value_type operator()(column_device_view col, size_type row_index)
396  {
397  if (has_nulls && col.is_null(row_index)) { return std::numeric_limits<hash_value_type>::max(); }
398 
399  return hash_function<T>{}(col.element<T>(row_index));
400  }
401 };
402 
403 template <template <typename> class hash_function, bool has_nulls = true>
405  public:
406  __device__ element_hasher_with_seed()
407  : _seed{0}, _null_hash(std::numeric_limits<hash_value_type>::max())
408  {
409  }
410  __device__ element_hasher_with_seed(
411  uint32_t seed = 0, hash_value_type null_hash = std::numeric_limits<hash_value_type>::max())
412  : _seed{seed}, _null_hash(null_hash)
413  {
414  }
415  // seed, null_hash, byte endianness
416  template <typename T>
417  __device__ inline hash_value_type operator()(column_device_view col, size_type row_index)
418  {
419  if (has_nulls && col.is_null(row_index)) { return _null_hash; }
420 
421  return hash_function<T>{_seed}(col.element<T>(row_index));
422  }
423 
424  private:
425  uint32_t _seed;
426  hash_value_type _null_hash;
427 };
428 
435 template <template <typename> class hash_function, bool has_nulls = true>
436 class row_hasher {
437  public:
438  row_hasher() = delete;
439  row_hasher(table_device_view t) : _table{t} {}
440 
441  __device__ auto operator()(size_type row_index) const
442  {
443  auto hash_combiner = [](hash_value_type lhs, hash_value_type rhs) {
444  return hash_function<hash_value_type>{}.hash_combine(lhs, rhs);
445  };
446 
447  // Hashes an element in a column
448  auto hasher = [=](size_type column_index) {
449  return cudf::type_dispatcher(_table.column(column_index).type(),
451  _table.column(column_index),
452  row_index);
453  };
454 
455  // Hash each element and combine all the hash values together
456  return thrust::transform_reduce(thrust::seq,
457  thrust::make_counting_iterator(0),
458  thrust::make_counting_iterator(_table.num_columns()),
459  hasher,
460  hash_value_type{0},
461  hash_combiner);
462  }
463 
464  private:
465  table_device_view _table;
466 };
467 
475 template <template <typename> class hash_function, bool has_nulls = true>
477  public:
478  row_hasher_initial_values() = delete;
479  row_hasher_initial_values(table_device_view t, hash_value_type* initial_hash)
480  : _table{t}, _initial_hash(initial_hash)
481  {
482  }
483 
484  __device__ auto operator()(size_type row_index) const
485  {
486  auto hash_combiner = [](hash_value_type lhs, hash_value_type rhs) {
487  return hash_function<hash_value_type>{}.hash_combine(lhs, rhs);
488  };
489 
490  // Hashes an element in a column and combines with an initial value
491  auto hasher = [=](size_type column_index) {
492  auto hash_value = cudf::type_dispatcher(_table.column(column_index).type(),
494  _table.column(column_index),
495  row_index);
496 
497  return hash_combiner(_initial_hash[column_index], hash_value);
498  };
499 
500  // Hash each element and combine all the hash values together
501  return thrust::transform_reduce(thrust::seq,
502  thrust::make_counting_iterator(0),
503  thrust::make_counting_iterator(_table.num_columns()),
504  hasher,
505  hash_value_type{0},
506  hash_combiner);
507  }
508 
509  private:
510  table_device_view _table;
511  hash_value_type* _initial_hash;
512 };
513 
514 } // namespace cudf
cudf::row_hasher_initial_values
Computes the hash value of a row in the given table, combined with an initial hash value for each col...
Definition: row_operators.cuh:476
cudf::type_dispatcher
constexpr decltype(auto) CUDA_HOST_DEVICE_CALLABLE type_dispatcher(cudf::data_type dtype, Functor f, Ts &&... args)
Invokes an operator() template with the type instantiation based on the specified cudf::data_type's i...
Definition: type_dispatcher.hpp:401
cudf::row_lexicographic_comparator
Computes whether one row is lexicographically less than another row.
Definition: row_operators.cuh:320
cudf::table_device_view
Definition: table_device_view.cuh:81
cudf::column_device_view
An immutable, non-owning view of device data as a column of elements that is trivially copyable and u...
Definition: column_device_view.cuh:254
cudf::element_equality_comparator::element_equality_comparator
__host__ __device__ element_equality_comparator(column_device_view lhs, column_device_view rhs, bool nulls_are_equal=true)
Construct type-dispatched function object for comparing equality between two elements.
Definition: row_operators.cuh:156
cudf::row_hasher
Computes the hash value of a row in the given table.
Definition: row_operators.cuh:436
cudf::column_device_view::element
__device__ T const element(size_type element_index) const noexcept
Returns reference to element at the specified index.
Definition: column_device_view.cuh:287
cudf::element_relational_comparator::element_relational_comparator
__host__ __device__ element_relational_comparator(column_device_view lhs, column_device_view rhs, null_order null_precedence)
Construct type-dispatched function object for performing a relational comparison between two elements...
Definition: row_operators.cuh:249
cudf::element_hasher_with_seed
Definition: row_operators.cuh:404
cudf::null_order
null_order
Indicates how null values compare against all other values.
Definition: types.hpp:152
column_device_view.cuh
Column device view class definitons.
cudf::weak_ordering
weak_ordering
Result type of the element_relational_comparator function object.
Definition: row_operators.cuh:41
cudf::element_relational_comparator::operator()
__device__ weak_ordering operator()(size_type lhs_element_index, size_type rhs_element_index) const noexcept
Performs a relational comparison between the specified elements.
Definition: row_operators.cuh:268
cudf::null_order::AFTER
@ AFTER
NULL values ordered after all other values.
cudf::row_lexicographic_comparator::row_lexicographic_comparator
row_lexicographic_comparator(table_device_view lhs, table_device_view rhs, order const *column_order=nullptr, null_order const *null_precedence=nullptr)
Construct a function object for performing a lexicographic comparison between the rows of two tables.
Definition: row_operators.cuh:338
cudf::element_relational_comparator
Performs a relational comparison between two elements in two columns.
Definition: row_operators.cuh:236
cudf::element_equality_comparator::operator()
__device__ bool operator()(size_type lhs_element_index, size_type rhs_element_index) const noexcept
Compares the specified elements for equality.
Definition: row_operators.cuh:172
cudf
cuDF interfaces
Definition: aggregation.hpp:34
cudf::detail::column_device_view_base::is_null
__device__ bool is_null(size_type element_index) const noexcept
Returns whether the specified element is null.
Definition: column_device_view.cuh:185
cudf::element_hasher
Computes the hash value of an element in the given column.
Definition: row_operators.cuh:392
cudf::relational_compare
__device__ weak_ordering relational_compare(Element lhs, Element rhs)
A specialization for non-floating-point Element type relational comparison to derive the order of the...
Definition: row_operators.cuh:81
cudf::weak_ordering::LESS
@ LESS
Indicates a is less than (ordered before) b
table_device_view.cuh
Table device view class definitons.
CUDF_EXPECTS
#define CUDF_EXPECTS(cond, reason)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:62
cudf::row_equality_comparator
Definition: row_operators.cuh:204
cudf::detail::column_device_view_base::type
__host__ __device__ data_type type() const noexcept
Returns the element type.
Definition: column_device_view.cuh:108
cudf::equality_compare
__device__ bool equality_compare(Element lhs, Element rhs)
A specialization for floating-point Element type to check if lhs is equivalent to rhs....
Definition: row_operators.cuh:118
cudf::row_lexicographic_comparator::operator()
__device__ bool operator()(size_type lhs_index, size_type rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table compares lexicographically less than the row at ...
Definition: row_operators.cuh:357
type_dispatcher.hpp
Defines the mapping between cudf::type_id runtime type information and concrete C++ types.
traits.hpp
sorting.hpp
Column APIs for sort and rank.
cudf::element_equality_comparator
Performs an equality comparison between two elements in two columns.
Definition: row_operators.cuh:144
cudf::order
order
Indicates the order in which elements should be sorted.
Definition: types.hpp:120