experimental/row_operators.cuh
1 /*
2  * Copyright (c) 2022-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
20 #include <cudf/detail/iterator.cuh>
21 #include <cudf/detail/utilities/algorithm.cuh>
22 #include <cudf/detail/utilities/assert.cuh>
23 #include <cudf/hashing/detail/default_hash.cuh>
24 #include <cudf/hashing/detail/hashing.hpp>
25 #include <cudf/lists/detail/dremel.hpp>
26 #include <cudf/lists/list_device_view.cuh>
27 #include <cudf/lists/lists_column_device_view.cuh>
28 #include <cudf/sorting.hpp>
29 #include <cudf/structs/structs_column_device_view.cuh>
30 #include <cudf/table/row_operators.cuh>
35 
36 #include <cuda/std/tuple>
37 #include <cuda/std/utility>
38 #include <thrust/detail/use_default.h>
39 #include <thrust/equal.h>
40 #include <thrust/execution_policy.h>
41 #include <thrust/functional.h>
42 #include <thrust/iterator/counting_iterator.h>
43 #include <thrust/iterator/iterator_adaptor.h>
44 #include <thrust/iterator/iterator_categories.h>
45 #include <thrust/iterator/iterator_facade.h>
46 #include <thrust/iterator/transform_iterator.h>
47 #include <thrust/logical.h>
48 #include <thrust/swap.h>
49 #include <thrust/transform_reduce.h>
50 
51 #include <limits>
52 #include <memory>
53 #include <optional>
54 #include <type_traits>
55 #include <utility>
56 
57 namespace CUDF_EXPORT cudf {
58 
59 namespace experimental {
60 
76 template <cudf::type_id t>
79  using type = std::conditional_t<t == type_id::STRUCT or t == type_id::LIST, void, id_to_type<t>>;
80 };
81 
82 namespace row {
83 
84 enum class lhs_index_type : size_type {};
85 enum class rhs_index_type : size_type {};
86 
100 template <typename Index, typename Underlying = std::underlying_type_t<Index>>
101 struct strong_index_iterator : public thrust::iterator_facade<strong_index_iterator<Index>,
102  Index,
103  thrust::use_default,
104  thrust::random_access_traversal_tag,
105  Index,
106  Underlying> {
107  using super_t =
108  thrust::iterator_adaptor<strong_index_iterator<Index>, Index>;
109 
115  explicit constexpr strong_index_iterator(Underlying n) : begin{n} {}
116 
117  friend class thrust::iterator_core_access;
118 
119  private:
120  __device__ constexpr void increment() { ++begin; }
121  __device__ constexpr void decrement() { --begin; }
122 
123  __device__ constexpr void advance(Underlying n) { begin += n; }
124 
125  __device__ constexpr bool equal(strong_index_iterator<Index> const& other) const noexcept
126  {
127  return begin == other.begin;
128  }
129 
130  __device__ constexpr Index dereference() const noexcept { return static_cast<Index>(begin); }
131 
132  __device__ constexpr Underlying distance_to(
133  strong_index_iterator<Index> const& other) const noexcept
134  {
135  return other.begin - begin;
136  }
137 
138  Underlying begin{};
139 };
140 
144 using lhs_iterator = strong_index_iterator<lhs_index_type>;
145 
149 using rhs_iterator = strong_index_iterator<rhs_index_type>;
150 
151 namespace lexicographic {
152 
168  template <typename Element>
169  __device__ constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
170  {
171  return detail::compare_elements(lhs, rhs);
172  }
173 };
174 
188  template <typename Element, CUDF_ENABLE_IF(not std::is_floating_point_v<Element>)>
189  __device__ constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
190  {
191  return detail::compare_elements(lhs, rhs);
192  }
193 
201  template <typename Element, CUDF_ENABLE_IF(std::is_floating_point_v<Element>)>
202  __device__ constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
203  {
204  if (isnan(lhs)) {
205  return isnan(rhs) ? weak_ordering::EQUIVALENT : weak_ordering::GREATER;
206  } else if (isnan(rhs)) {
207  return weak_ordering::LESS;
208  }
209 
210  return detail::compare_elements(lhs, rhs);
211  }
212 };
213 
214 using optional_dremel_view = cuda::std::optional<detail::dremel_device_view const>;
215 
216 // The has_nested_columns template parameter of the device_row_comparator is
217 // necessary to help the compiler optimize our code. Without it, the list and
218 // struct view specializations are present in the code paths used for primitive
219 // types, and the compiler fails to inline this nearly as well resulting in a
220 // significant performance drop. As a result, there is some minor tension in
221 // the current design between the presence of this parameter and the way that
222 // the Dremel data is passed around, first as a
223 // std::optional<device_span<dremel_device_view>> in the
224 // preprocessed_table/device_row_comparator (which is always valid when
225 // has_nested_columns and is otherwise invalid) that is then unpacked to a
226 // cuda::std::optional<dremel_device_view> at the element_comparator level (which
227 // is always valid for a list column and otherwise invalid). We cannot use an
228 // additional template parameter for the element_comparator on a per-column
229 // basis because we cannot conditionally define dremel_device_view member
230 // variables without jumping through extra hoops with inheritance, so the
231 // cuda::std::optional<dremel_device_view> member must be an optional rather than
232 // a raw dremel_device_view.
263 template <bool has_nested_columns,
264  typename Nullate,
265  typename PhysicalElementComparator = sorting_physical_element_comparator>
267  public:
268  friend class self_comparator;
269  friend class two_table_comparator;
270 
290  device_row_comparator(Nullate check_nulls,
291  table_device_view lhs,
292  table_device_view rhs,
293  device_span<detail::dremel_device_view const> l_dremel_device_views,
294  device_span<detail::dremel_device_view const> r_dremel_device_views,
295  std::optional<device_span<int const>> depth = std::nullopt,
296  std::optional<device_span<order const>> column_order = std::nullopt,
297  std::optional<device_span<null_order const>> null_precedence = std::nullopt,
298  PhysicalElementComparator comparator = {}) noexcept
299  : _lhs{lhs},
300  _rhs{rhs},
301  _l_dremel(l_dremel_device_views),
302  _r_dremel(r_dremel_device_views),
303  _check_nulls{check_nulls},
304  _depth{depth},
305  _column_order{column_order},
306  _null_precedence{null_precedence},
307  _comparator{comparator}
308  {
309  }
310 
329  template <bool nested_disable = not has_nested_columns, CUDF_ENABLE_IF(nested_disable)>
331  Nullate check_nulls,
332  table_device_view lhs,
333  table_device_view rhs,
334  std::optional<device_span<order const>> column_order = std::nullopt,
335  std::optional<device_span<null_order const>> null_precedence = std::nullopt,
336  PhysicalElementComparator comparator = {}) noexcept
337  : _lhs{lhs},
338  _rhs{rhs},
339  _l_dremel{},
340  _r_dremel{},
341  _check_nulls{check_nulls},
342  _depth{},
343  _column_order{column_order},
344  _null_precedence{null_precedence},
345  _comparator{comparator}
346  {
347  }
348 
353  public:
370  __device__ element_comparator(Nullate check_nulls,
371  column_device_view lhs,
372  column_device_view rhs,
373  null_order null_precedence = null_order::BEFORE,
374  int depth = 0,
375  PhysicalElementComparator comparator = {},
376  optional_dremel_view l_dremel_device_view = {},
377  optional_dremel_view r_dremel_device_view = {})
378  : _lhs{lhs},
379  _rhs{rhs},
380  _check_nulls{check_nulls},
381  _null_precedence{null_precedence},
382  _depth{depth},
383  _l_dremel_device_view{l_dremel_device_view},
384  _r_dremel_device_view{r_dremel_device_view},
385  _comparator{comparator}
386  {
387  }
388 
397  template <typename Element,
398  CUDF_ENABLE_IF(cudf::is_relationally_comparable<Element, Element>())>
399  __device__ cuda::std::pair<weak_ordering, int> operator()(
400  size_type const lhs_element_index, size_type const rhs_element_index) const noexcept
401  {
402  if (_check_nulls) {
403  bool const lhs_is_null{_lhs.is_null(lhs_element_index)};
404  bool const rhs_is_null{_rhs.is_null(rhs_element_index)};
405 
406  if (lhs_is_null or rhs_is_null) { // at least one is null
407  return cuda::std::pair(null_compare(lhs_is_null, rhs_is_null, _null_precedence), _depth);
408  }
409  }
410 
411  return cuda::std::pair(_comparator(_lhs.element<Element>(lhs_element_index),
412  _rhs.element<Element>(rhs_element_index)),
413  std::numeric_limits<int>::max());
414  }
415 
423  template <typename Element,
424  CUDF_ENABLE_IF(not cudf::is_relationally_comparable<Element, Element>() and
425  (not has_nested_columns or not cudf::is_nested<Element>()))>
426  __device__ cuda::std::pair<weak_ordering, int> operator()(size_type const,
427  size_type const) const noexcept
428  {
429  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
430  }
431 
440  template <typename Element,
441  CUDF_ENABLE_IF(has_nested_columns and std::is_same_v<Element, cudf::struct_view>)>
442  __device__ cuda::std::pair<weak_ordering, int> operator()(
443  size_type const lhs_element_index, size_type const rhs_element_index) const noexcept
444  {
445  column_device_view lcol = _lhs;
446  column_device_view rcol = _rhs;
447  int depth = _depth;
448  while (lcol.type().id() == type_id::STRUCT) {
449  bool const lhs_is_null{lcol.is_null(lhs_element_index)};
450  bool const rhs_is_null{rcol.is_null(rhs_element_index)};
451 
452  if (lhs_is_null or rhs_is_null) { // at least one is null
453  weak_ordering state = null_compare(lhs_is_null, rhs_is_null, _null_precedence);
454  return cuda::std::pair(state, depth);
455  }
456 
457  if (lcol.num_child_columns() == 0) {
458  return cuda::std::pair(weak_ordering::EQUIVALENT, std::numeric_limits<int>::max());
459  }
460 
461  // Non-empty structs have been modified to only have 1 child when using this.
464  ++depth;
465  }
466 
467  return cudf::type_dispatcher<dispatch_void_if_nested>(
468  lcol.type(),
469  element_comparator{_check_nulls, lcol, rcol, _null_precedence, depth, _comparator},
470  lhs_element_index,
471  rhs_element_index);
472  }
473 
482  template <typename Element,
483  CUDF_ENABLE_IF(has_nested_columns and std::is_same_v<Element, cudf::list_view>)>
484  __device__ cuda::std::pair<weak_ordering, int> operator()(size_type lhs_element_index,
485  size_type rhs_element_index)
486  {
487  // only order top-NULLs according to null_order
488  auto const is_l_row_null = _lhs.is_null(lhs_element_index);
489  auto const is_r_row_null = _rhs.is_null(rhs_element_index);
490  if (is_l_row_null || is_r_row_null) {
491  return cuda::std::pair(null_compare(is_l_row_null, is_r_row_null, _null_precedence),
492  _depth);
493  }
494 
495  // These are all the values from the Dremel encoding.
496  auto const l_max_def_level = _l_dremel_device_view->max_def_level;
497  auto const r_max_def_level = _r_dremel_device_view->max_def_level;
498  auto const l_def_levels = _l_dremel_device_view->def_levels;
499  auto const r_def_levels = _r_dremel_device_view->def_levels;
500  auto const l_rep_levels = _l_dremel_device_view->rep_levels;
501  auto const r_rep_levels = _r_dremel_device_view->rep_levels;
502 
503  // Traverse the nested list hierarchy to get a column device view
504  // pointing to the underlying child data.
505  column_device_view lcol = _lhs.slice(lhs_element_index, 1);
506  column_device_view rcol = _rhs.slice(rhs_element_index, 1);
507 
508  while (lcol.type().id() == type_id::LIST) {
511  }
512 
513  // These start and end values indicate the start and end points of all
514  // the elements of the lists in the current list element
515  // (`[lhs|rhs]_element_index`) that we are comparing.
516  auto const l_offsets = _l_dremel_device_view->offsets;
517  auto const r_offsets = _r_dremel_device_view->offsets;
518  auto l_start = l_offsets[lhs_element_index];
519  auto l_end = l_offsets[lhs_element_index + 1];
520  auto r_start = r_offsets[rhs_element_index];
521  auto r_end = r_offsets[rhs_element_index + 1];
522 
523  // This comparator will be used to compare leaf (non-nested) data types.
524  auto comparator =
525  element_comparator{_check_nulls, lcol, rcol, _null_precedence, _depth, _comparator};
526 
527  // Loop over each element in the encoding. Note that this includes nulls
528  // and empty lists, so not every index corresponds to an actual element
529  // in the child column. The element_index is used to keep track of the current
530  // child element that we're actually comparing.
531  for (int l_dremel_index = l_start, r_dremel_index = r_start, element_index = 0;
532  l_dremel_index < l_end and r_dremel_index < r_end;
533  ++l_dremel_index, ++r_dremel_index) {
534  auto const l_rep_level = l_rep_levels[l_dremel_index];
535  auto const r_rep_level = r_rep_levels[r_dremel_index];
536 
537  // early exit for smaller sub-list
538  if (l_rep_level != r_rep_level) {
539  // the lower repetition level is a smaller sub-list
540  return l_rep_level < r_rep_level ? cuda::std::pair(weak_ordering::LESS, _depth)
541  : cuda::std::pair(weak_ordering::GREATER, _depth);
542  }
543 
544  // only compare if left and right are at same nesting level
545  auto const l_def_level = l_def_levels[l_dremel_index];
546  auto const r_def_level = r_def_levels[r_dremel_index];
547 
548  // either left or right are empty or NULLs of arbitrary nesting
549  if (l_def_level < l_max_def_level || r_def_level < r_max_def_level) {
550  // in the fully unraveled version of the list column, only the
551  // most nested NULLs and leafs are present
552  // In this rare condition that we get to the most nested NULL, we increment
553  // element_index because either both rows have a deeply nested NULL at the
554  // same position, and we'll "continue" in our iteration, or we will early
555  // exit if only one of the rows has a deeply nested NULL
556  if ((lcol.nullable() and l_def_levels[l_dremel_index] == l_max_def_level - 1) or
557  (rcol.nullable() and r_def_levels[r_dremel_index] == r_max_def_level - 1)) {
558  ++element_index;
559  }
560  if (l_def_level == r_def_level) { continue; }
561  // We require [] < [NULL] < [leaf] for nested nulls.
562  // The null_precedence only affects top level nulls.
563  return l_def_level < r_def_level ? cuda::std::pair(weak_ordering::LESS, _depth)
564  : cuda::std::pair(weak_ordering::GREATER, _depth);
565  }
566 
567  // finally, compare leaf to leaf
568  weak_ordering state{weak_ordering::EQUIVALENT};
569  int last_null_depth = _depth;
570  cuda::std::tie(state, last_null_depth) = cudf::type_dispatcher<dispatch_void_if_nested>(
571  lcol.type(), comparator, element_index, element_index);
572  if (state != weak_ordering::EQUIVALENT) { return cuda::std::pair(state, _depth); }
573  ++element_index;
574  }
575 
576  // If we have reached this stage, we know that definition levels,
577  // repetition levels, and actual elements are identical in both list
578  // columns up to the `min(l_end - l_start, r_end - r_start)` element of
579  // the Dremel encoding. However, two lists can only compare equivalent if
580  // they are of the same length. Otherwise, the shorter of the two is less
581  // than the longer. This final check determines the appropriate resulting
582  // ordering by checking how many total elements each list is composed of.
583  return cuda::std::pair(detail::compare_elements(l_end - l_start, r_end - r_start), _depth);
584  }
585 
586  private:
587  column_device_view const _lhs;
588  column_device_view const _rhs;
589  Nullate const _check_nulls;
590  null_order const _null_precedence;
591  int const _depth;
592  optional_dremel_view _l_dremel_device_view;
593  optional_dremel_view _r_dremel_device_view;
594  PhysicalElementComparator const _comparator;
595  };
596 
597  public:
607  __device__ constexpr weak_ordering operator()(size_type const lhs_index,
608  size_type const rhs_index) const noexcept
609  {
610  int last_null_depth = std::numeric_limits<int>::max();
611  size_type list_column_index{-1};
612  for (size_type i = 0; i < _lhs.num_columns(); ++i) {
613  if (_lhs.column(i).type().id() == type_id::LIST) { ++list_column_index; }
614 
615  int const depth = _depth.has_value() ? (*_depth)[i] : 0;
616  if (depth > last_null_depth) { continue; }
617 
618  bool const ascending =
619  _column_order.has_value() ? (*_column_order)[i] == order::ASCENDING : true;
620 
621  null_order const null_precedence =
622  _null_precedence.has_value() ? (*_null_precedence)[i] : null_order::BEFORE;
623 
624  // TODO: At what point do we verify that the columns of lhs and rhs are
625  // all of the same types? I assume that it's already happened before
626  // here, otherwise the current code would be failing.
627  auto const [l_dremel_i, r_dremel_i] =
628  _lhs.column(i).type().id() == type_id::LIST
629  ? std::make_tuple(optional_dremel_view(_l_dremel[list_column_index]),
630  optional_dremel_view(_r_dremel[list_column_index]))
631  : std::make_tuple(optional_dremel_view{}, optional_dremel_view{});
632 
633  auto element_comp = element_comparator{_check_nulls,
634  _lhs.column(i),
635  _rhs.column(i),
636  null_precedence,
637  depth,
638  _comparator,
639  l_dremel_i,
640  r_dremel_i};
641 
642  weak_ordering state;
643  cuda::std::tie(state, last_null_depth) =
644  cudf::type_dispatcher(_lhs.column(i).type(), element_comp, lhs_index, rhs_index);
645 
646  if (state == weak_ordering::EQUIVALENT) { continue; }
647 
648  return ascending
649  ? state
650  : (state == weak_ordering::GREATER ? weak_ordering::LESS : weak_ordering::GREATER);
651  }
652  return weak_ordering::EQUIVALENT;
653  }
654 
655  private:
656  table_device_view const _lhs;
657  table_device_view const _rhs;
660  Nullate const _check_nulls;
661  std::optional<device_span<int const>> const _depth;
662  std::optional<device_span<order const>> const _column_order;
663  std::optional<device_span<null_order const>> const _null_precedence;
664  PhysicalElementComparator const _comparator;
665 }; // class device_row_comparator
666 
677 template <typename Comparator, weak_ordering... values>
678 struct weak_ordering_comparator_impl {
679  static_assert(not((weak_ordering::EQUIVALENT == values) && ...),
680  "weak_ordering_comparator should not be used for pure equality comparisons. The "
681  "`row_equality_comparator` should be used instead");
682 
683  template <typename LhsType, typename RhsType>
684  __device__ constexpr bool operator()(LhsType const lhs_index,
685  RhsType const rhs_index) const noexcept
686  {
687  weak_ordering const result = comparator(lhs_index, rhs_index);
688  return ((result == values) || ...);
689  }
690  Comparator const comparator;
691 };
692 
699 template <typename Comparator>
700 struct less_comparator : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS> {
706  less_comparator(Comparator const& comparator)
707  : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS>{comparator}
708  {
709  }
710 };
711 
719 template <typename Comparator>
721  : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS, weak_ordering::EQUIVALENT> {
727  less_equivalent_comparator(Comparator const& comparator)
728  : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS, weak_ordering::EQUIVALENT>{
729  comparator}
730  {
731  }
732 };
733 
741  std::invoke_result_t<decltype(table_device_view::create), table_view, rmm::cuda_stream_view>;
742 
765  static std::shared_ptr<preprocessed_table> create(table_view const& table,
766  host_span<order const> column_order,
767  host_span<null_order const> null_precedence,
768  rmm::cuda_stream_view stream);
769 
791  static std::pair<std::shared_ptr<preprocessed_table>, std::shared_ptr<preprocessed_table>> create(
792  table_view const& lhs,
793  table_view const& rhs,
794  host_span<order const> column_order,
795  host_span<null_order const> null_precedence,
796  rmm::cuda_stream_view stream);
797 
798  private:
799  friend class self_comparator;
800  friend class two_table_comparator;
801 
821  static std::shared_ptr<preprocessed_table> create(
822  table_view const& preprocessed_input,
823  std::vector<int>&& verticalized_col_depths,
824  std::vector<std::unique_ptr<column>>&& transformed_columns,
825  host_span<order const> column_order,
826  host_span<null_order const> null_precedence,
827  bool has_ranked_children,
828  rmm::cuda_stream_view stream);
829 
857  rmm::device_uvector<order>&& column_order,
858  rmm::device_uvector<null_order>&& null_precedence,
860  std::vector<detail::dremel_data>&& dremel_data,
862  std::vector<std::unique_ptr<column>>&& transformed_columns,
863  bool has_ranked_children);
864 
866  rmm::device_uvector<order>&& column_order,
867  rmm::device_uvector<null_order>&& null_precedence,
869  std::vector<std::unique_ptr<column>>&& transformed_columns,
870  bool has_ranked_children);
871 
877  operator table_device_view() { return *_t; }
878 
885  [[nodiscard]] std::optional<device_span<order const>> column_order() const
886  {
887  return _column_order.size() ? std::optional<device_span<order const>>(_column_order)
888  : std::nullopt;
889  }
890 
898  [[nodiscard]] std::optional<device_span<null_order const>> null_precedence() const
899  {
900  return _null_precedence.size() ? std::optional<device_span<null_order const>>(_null_precedence)
901  : std::nullopt;
902  }
903 
912  [[nodiscard]] std::optional<device_span<int const>> depths() const
913  {
914  return _depths.size() ? std::optional<device_span<int const>>(_depths) : std::nullopt;
915  }
916 
917  [[nodiscard]] device_span<detail::dremel_device_view const> dremel_device_views() const
918  {
919  if (_dremel_device_views.has_value()) {
920  return device_span<detail::dremel_device_view const>(*_dremel_device_views);
921  } else {
922  return {};
923  }
924  }
925 
926  template <typename PhysicalElementComparator>
927  void check_physical_element_comparator()
928  {
929  if constexpr (!std::is_same_v<PhysicalElementComparator, sorting_physical_element_comparator>) {
930  CUDF_EXPECTS(!_has_ranked_children,
931  "The input table has nested type children and they were transformed using a "
932  "different type of physical element comparator.");
933  }
934  }
935 
936  private:
937  table_device_view_owner const _t;
938  rmm::device_uvector<order> const _column_order;
939  rmm::device_uvector<null_order> const _null_precedence;
940  rmm::device_uvector<size_type> const _depths;
941 
942  // Dremel encoding of list columns used for the comparison algorithm
943  std::optional<std::vector<detail::dremel_data>> _dremel_data;
944  std::optional<rmm::device_uvector<detail::dremel_device_view>> _dremel_device_views;
945 
946  // Intermediate columns generated from transforming nested children columns into
947  // integers columns using `cudf::rank()`, need to be kept alive.
948  std::vector<std::unique_ptr<column>> _transformed_columns;
949 
950  // Flag to record if the input table was preprocessed to transform any nested children column(s)
951  // into integer column(s) using `cudf::rank`.
952  bool const _has_ranked_children;
953 };
954 
969  public:
985  host_span<order const> column_order = {},
986  host_span<null_order const> null_precedence = {},
988  : d_t{preprocessed_table::create(t, column_order, null_precedence, stream)}
989  {
990  }
991 
1001  self_comparator(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
1002 
1031  template <bool has_nested_columns,
1032  typename Nullate,
1033  typename PhysicalElementComparator = sorting_physical_element_comparator>
1034  auto less(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const
1035  {
1036  d_t->check_physical_element_comparator<PhysicalElementComparator>();
1037 
1038  return less_comparator{
1039  device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1040  nullate,
1041  *d_t,
1042  *d_t,
1043  d_t->dremel_device_views(),
1044  d_t->dremel_device_views(),
1045  d_t->depths(),
1046  d_t->column_order(),
1047  d_t->null_precedence(),
1048  comparator}};
1049  }
1050 
1052  template <bool has_nested_columns,
1053  typename Nullate,
1054  typename PhysicalElementComparator = sorting_physical_element_comparator>
1055  auto less_equivalent(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const
1056  {
1057  d_t->check_physical_element_comparator<PhysicalElementComparator>();
1058 
1059  return less_equivalent_comparator{
1060  device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1061  nullate,
1062  *d_t,
1063  *d_t,
1064  d_t->dremel_device_views(),
1065  d_t->dremel_device_views(),
1066  d_t->depths(),
1067  d_t->column_order(),
1068  d_t->null_precedence(),
1069  comparator}};
1070  }
1071 
1072  private:
1073  std::shared_ptr<preprocessed_table> d_t;
1074 };
1075 
1076 // @cond
1077 template <typename Comparator>
1078 struct strong_index_comparator_adapter {
1079  strong_index_comparator_adapter(Comparator const& comparator) : comparator{comparator} {}
1080 
1081  __device__ constexpr weak_ordering operator()(lhs_index_type const lhs_index,
1082  rhs_index_type const rhs_index) const noexcept
1083  {
1084  return comparator(static_cast<cudf::size_type>(lhs_index),
1085  static_cast<cudf::size_type>(rhs_index));
1086  }
1087 
1088  __device__ constexpr weak_ordering operator()(rhs_index_type const rhs_index,
1089  lhs_index_type const lhs_index) const noexcept
1090  {
1091  auto const left_right_ordering =
1092  comparator(static_cast<cudf::size_type>(lhs_index), static_cast<cudf::size_type>(rhs_index));
1093 
1094  // Invert less/greater values to reflect right to left ordering
1095  if (left_right_ordering == weak_ordering::LESS) {
1096  return weak_ordering::GREATER;
1097  } else if (left_right_ordering == weak_ordering::GREATER) {
1098  return weak_ordering::LESS;
1099  }
1100  return weak_ordering::EQUIVALENT;
1101  }
1102 
1103  Comparator const comparator;
1104 };
1105 // @endcond
1106 
1122  public:
1142  table_view const& right,
1143  host_span<order const> column_order = {},
1144  host_span<null_order const> null_precedence = {},
1146 
1161  two_table_comparator(std::shared_ptr<preprocessed_table> left,
1162  std::shared_ptr<preprocessed_table> right)
1163  : d_left_table{std::move(left)}, d_right_table{std::move(right)}
1164  {
1165  }
1166 
1203  template <bool has_nested_columns,
1204  typename Nullate,
1205  typename PhysicalElementComparator = sorting_physical_element_comparator>
1206  auto less(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const
1207  {
1208  d_left_table->check_physical_element_comparator<PhysicalElementComparator>();
1209  d_right_table->check_physical_element_comparator<PhysicalElementComparator>();
1210 
1211  return less_comparator{strong_index_comparator_adapter{
1212  device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1213  nullate,
1214  *d_left_table,
1215  *d_right_table,
1216  d_left_table->dremel_device_views(),
1217  d_right_table->dremel_device_views(),
1218  d_left_table->depths(),
1219  d_left_table->column_order(),
1220  d_left_table->null_precedence(),
1221  comparator}}};
1222  }
1223 
1225  template <bool has_nested_columns,
1226  typename Nullate,
1227  typename PhysicalElementComparator = sorting_physical_element_comparator>
1228  auto less_equivalent(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const
1229  {
1230  d_left_table->check_physical_element_comparator<PhysicalElementComparator>();
1231  d_right_table->check_physical_element_comparator<PhysicalElementComparator>();
1232 
1233  return less_equivalent_comparator{strong_index_comparator_adapter{
1234  device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1235  nullate,
1236  *d_left_table,
1237  *d_right_table,
1238  d_left_table->dremel_device_views(),
1239  d_right_table->dremel_device_views(),
1240  d_left_table->depths(),
1241  d_left_table->column_order(),
1242  d_left_table->null_precedence(),
1243  comparator}}};
1244  }
1245 
1246  private:
1247  std::shared_ptr<preprocessed_table> d_left_table;
1248  std::shared_ptr<preprocessed_table> d_right_table;
1249 };
1250 
1251 } // namespace lexicographic
1252 
1253 namespace hash {
1254 class row_hasher;
1255 } // namespace hash
1256 
1257 namespace equality {
1258 
1274  template <typename Element>
1275  __device__ constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
1276  {
1277  return lhs == rhs;
1278  }
1279 };
1280 
1293  template <typename Element, CUDF_ENABLE_IF(not std::is_floating_point_v<Element>)>
1294  __device__ constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
1295  {
1296  return lhs == rhs;
1297  }
1298 
1308  template <typename Element, CUDF_ENABLE_IF(std::is_floating_point_v<Element>)>
1309  __device__ constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
1310  {
1311  return isnan(lhs) and isnan(rhs) ? true : lhs == rhs;
1312  }
1313 };
1314 
1336 template <bool has_nested_columns,
1337  typename Nullate,
1338  typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
1340  friend class self_comparator;
1341  friend class two_table_comparator;
1342 
1343  public:
1352  __device__ constexpr bool operator()(size_type const lhs_index,
1353  size_type const rhs_index) const noexcept
1354  {
1355  auto equal_elements = [=](column_device_view l, column_device_view r) {
1356  return cudf::type_dispatcher(
1357  l.type(),
1358  element_comparator{check_nulls, l, r, nulls_are_equal, comparator},
1359  lhs_index,
1360  rhs_index);
1361  };
1362 
1363  return thrust::equal(thrust::seq, lhs.begin(), lhs.end(), rhs.begin(), equal_elements);
1364  }
1365 
1366  private:
1377  device_row_comparator(Nullate check_nulls,
1378  table_device_view lhs,
1379  table_device_view rhs,
1380  null_equality nulls_are_equal = null_equality::EQUAL,
1381  PhysicalEqualityComparator comparator = {}) noexcept
1382  : lhs{lhs},
1383  rhs{rhs},
1384  check_nulls{check_nulls},
1385  nulls_are_equal{nulls_are_equal},
1386  comparator{comparator}
1387  {
1388  }
1389 
1393  class element_comparator {
1394  public:
1407  __device__ element_comparator(Nullate check_nulls,
1408  column_device_view lhs,
1409  column_device_view rhs,
1410  null_equality nulls_are_equal = null_equality::EQUAL,
1411  PhysicalEqualityComparator comparator = {}) noexcept
1412  : lhs{lhs},
1413  rhs{rhs},
1414  check_nulls{check_nulls},
1415  nulls_are_equal{nulls_are_equal},
1416  comparator{comparator}
1417  {
1418  }
1419 
1428  template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
1429  __device__ bool operator()(size_type const lhs_element_index,
1430  size_type const rhs_element_index) const noexcept
1431  {
1432  if (check_nulls) {
1433  bool const lhs_is_null{lhs.is_null(lhs_element_index)};
1434  bool const rhs_is_null{rhs.is_null(rhs_element_index)};
1435  if (lhs_is_null and rhs_is_null) {
1436  return nulls_are_equal == null_equality::EQUAL;
1437  } else if (lhs_is_null != rhs_is_null) {
1438  return false;
1439  }
1440  }
1441 
1442  return comparator(lhs.element<Element>(lhs_element_index),
1443  rhs.element<Element>(rhs_element_index));
1444  }
1445 
1446  template <typename Element,
1447  CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>() and
1448  (not has_nested_columns or not cudf::is_nested<Element>())),
1449  typename... Args>
1450  __device__ bool operator()(Args...)
1451  {
1452  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
1453  }
1454 
1455  template <typename Element, CUDF_ENABLE_IF(has_nested_columns and cudf::is_nested<Element>())>
1456  __device__ bool operator()(size_type const lhs_element_index,
1457  size_type const rhs_element_index) const noexcept
1458  {
1459  column_device_view lcol = lhs.slice(lhs_element_index, 1);
1460  column_device_view rcol = rhs.slice(rhs_element_index, 1);
1461  while (lcol.type().id() == type_id::STRUCT || lcol.type().id() == type_id::LIST) {
1462  if (check_nulls) {
1463  auto lvalid = detail::make_validity_iterator<true>(lcol);
1464  auto rvalid = detail::make_validity_iterator<true>(rcol);
1465  if (nulls_are_equal == null_equality::UNEQUAL) {
1466  if (thrust::any_of(
1467  thrust::seq, lvalid, lvalid + lcol.size(), thrust::logical_not<bool>()) or
1468  thrust::any_of(
1469  thrust::seq, rvalid, rvalid + rcol.size(), thrust::logical_not<bool>())) {
1470  return false;
1471  }
1472  } else {
1473  if (not thrust::equal(thrust::seq, lvalid, lvalid + lcol.size(), rvalid)) {
1474  return false;
1475  }
1476  }
1477  }
1478  if (lcol.type().id() == type_id::STRUCT) {
1479  if (lcol.num_child_columns() == 0) { return true; }
1480  // Non-empty structs are assumed to be decomposed and contain only one child
1481  lcol = detail::structs_column_device_view(lcol).get_sliced_child(0);
1482  rcol = detail::structs_column_device_view(rcol).get_sliced_child(0);
1483  } else if (lcol.type().id() == type_id::LIST) {
1484  auto l_list_col = detail::lists_column_device_view(lcol);
1485  auto r_list_col = detail::lists_column_device_view(rcol);
1486 
1487  auto lsizes = make_list_size_iterator(l_list_col);
1488  auto rsizes = make_list_size_iterator(r_list_col);
1489  if (not thrust::equal(thrust::seq, lsizes, lsizes + lcol.size(), rsizes)) {
1490  return false;
1491  }
1492 
1493  lcol = l_list_col.get_sliced_child();
1494  rcol = r_list_col.get_sliced_child();
1495  if (lcol.size() != rcol.size()) { return false; }
1496  }
1497  }
1498 
1499  auto comp = column_comparator{
1500  element_comparator{check_nulls, lcol, rcol, nulls_are_equal, comparator}, lcol.size()};
1501  return type_dispatcher<dispatch_void_if_nested>(lcol.type(), comp);
1502  }
1503 
1504  private:
1512  struct column_comparator {
1513  element_comparator const comp;
1514  size_type const size;
1515 
1521  template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
1522  __device__ bool operator()() const noexcept
1523  {
1524  return thrust::all_of(thrust::seq,
1525  thrust::make_counting_iterator(0),
1526  thrust::make_counting_iterator(0) + size,
1527  [=](auto i) { return comp.template operator()<Element>(i, i); });
1528  }
1529 
1530  template <typename Element,
1531  CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>()),
1532  typename... Args>
1533  __device__ bool operator()(Args...) const noexcept
1534  {
1535  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
1536  }
1537  };
1538 
1539  column_device_view const lhs;
1540  column_device_view const rhs;
1541  Nullate const check_nulls;
1542  null_equality const nulls_are_equal;
1543  PhysicalEqualityComparator const comparator;
1544  };
1545 
1546  table_device_view const lhs;
1547  table_device_view const rhs;
1548  Nullate const check_nulls;
1549  null_equality const nulls_are_equal;
1550  PhysicalEqualityComparator const comparator;
1551 };
1552 
1570  static std::shared_ptr<preprocessed_table> create(table_view const& table,
1571  rmm::cuda_stream_view stream);
1572 
1573  private:
1574  friend class self_comparator;
1575  friend class two_table_comparator;
1576  friend class hash::row_hasher;
1577 
1578  using table_device_view_owner =
1579  std::invoke_result_t<decltype(table_device_view::create), table_view, rmm::cuda_stream_view>;
1580 
1581  preprocessed_table(table_device_view_owner&& table,
1582  std::vector<rmm::device_buffer>&& null_buffers,
1583  std::vector<std::unique_ptr<column>>&& tmp_columns)
1584  : _t(std::move(table)),
1585  _null_buffers(std::move(null_buffers)),
1586  _tmp_columns(std::move(tmp_columns))
1587  {
1588  }
1589 
1595  operator table_device_view() { return *_t; }
1596 
1597  table_device_view_owner _t;
1598  std::vector<rmm::device_buffer> _null_buffers;
1599  std::vector<std::unique_ptr<column>> _tmp_columns;
1600 };
1601 
1607  public:
1617  : d_t(preprocessed_table::create(t, stream))
1618  {
1619  }
1620 
1630  self_comparator(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
1631 
1657  template <bool has_nested_columns,
1658  typename Nullate,
1659  typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
1660  auto equal_to(Nullate nullate = {},
1661  null_equality nulls_are_equal = null_equality::EQUAL,
1662  PhysicalEqualityComparator comparator = {}) const noexcept
1663  {
1664  return device_row_comparator<has_nested_columns, Nullate, PhysicalEqualityComparator>{
1665  nullate, *d_t, *d_t, nulls_are_equal, comparator};
1666  }
1667 
1668  private:
1669  std::shared_ptr<preprocessed_table> d_t;
1670 };
1671 
1672 // @cond
1673 template <typename Comparator>
1674 struct strong_index_comparator_adapter {
1675  strong_index_comparator_adapter(Comparator const& comparator) : comparator{comparator} {}
1676 
1677  __device__ constexpr bool operator()(lhs_index_type const lhs_index,
1678  rhs_index_type const rhs_index) const noexcept
1679  {
1680  return comparator(static_cast<cudf::size_type>(lhs_index),
1681  static_cast<cudf::size_type>(rhs_index));
1682  }
1683 
1684  __device__ constexpr bool operator()(rhs_index_type const rhs_index,
1685  lhs_index_type const lhs_index) const noexcept
1686  {
1687  return this->operator()(lhs_index, rhs_index);
1688  }
1689 
1690  Comparator const comparator;
1691 };
1692 // @endcond
1693 
1708  public:
1722  table_view const& right,
1723  rmm::cuda_stream_view stream);
1724 
1735  two_table_comparator(std::shared_ptr<preprocessed_table> left,
1736  std::shared_ptr<preprocessed_table> right)
1737  : d_left_table{std::move(left)}, d_right_table{std::move(right)}
1738  {
1739  }
1740 
1771  template <bool has_nested_columns,
1772  typename Nullate,
1773  typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
1774  auto equal_to(Nullate nullate = {},
1775  null_equality nulls_are_equal = null_equality::EQUAL,
1776  PhysicalEqualityComparator comparator = {}) const noexcept
1777  {
1778  return strong_index_comparator_adapter{
1779  device_row_comparator<has_nested_columns, Nullate, PhysicalEqualityComparator>(
1780  nullate, *d_left_table, *d_right_table, nulls_are_equal, comparator)};
1781  }
1782 
1783  private:
1784  std::shared_ptr<preprocessed_table> d_left_table;
1785  std::shared_ptr<preprocessed_table> d_right_table;
1786 };
1787 
1788 } // namespace equality
1789 
1790 namespace hash {
1791 
1798 template <template <typename> class hash_function, typename Nullate>
1800  public:
1808  __device__ element_hasher(
1809  Nullate nulls,
1810  uint32_t seed = DEFAULT_HASH_SEED,
1811  hash_value_type null_hash = std::numeric_limits<hash_value_type>::max()) noexcept
1812  : _check_nulls(nulls), _seed(seed), _null_hash(null_hash)
1813  {
1814  }
1815 
1824  template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
1825  __device__ hash_value_type operator()(column_device_view const& col,
1826  size_type row_index) const noexcept
1827  {
1828  if (_check_nulls && col.is_null(row_index)) { return _null_hash; }
1829  return hash_function<T>{_seed}(col.element<T>(row_index));
1830  }
1831 
1840  template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
1841  __device__ hash_value_type operator()(column_device_view const& col,
1842  size_type row_index) const noexcept
1843  {
1844  CUDF_UNREACHABLE("Unsupported type in hash.");
1845  }
1846 
1847  Nullate _check_nulls;
1848  uint32_t _seed;
1850 };
1851 
1858 template <template <typename> class hash_function, typename Nullate>
1860  friend class row_hasher;
1861 
1862  public:
1869  __device__ auto operator()(size_type row_index) const noexcept
1870  {
1871  auto it = thrust::make_transform_iterator(_table.begin(), [=](auto const& column) {
1872  return cudf::type_dispatcher<dispatch_storage_type>(
1873  column.type(),
1874  element_hasher_adapter<hash_function>{_check_nulls, _seed},
1875  column,
1876  row_index);
1877  });
1878 
1879  // Hash each element and combine all the hash values together
1880  return detail::accumulate(it, it + _table.num_columns(), _seed, [](auto hash, auto h) {
1881  return cudf::hashing::detail::hash_combine(hash, h);
1882  });
1883  }
1884 
1885  private:
1893  template <template <typename> class hash_fn>
1894  class element_hasher_adapter {
1895  static constexpr hash_value_type NULL_HASH = std::numeric_limits<hash_value_type>::max();
1896  static constexpr hash_value_type NON_NULL_HASH = 0;
1897 
1898  public:
1899  __device__ element_hasher_adapter(Nullate check_nulls, uint32_t seed) noexcept
1900  : _element_hasher(check_nulls, seed), _check_nulls(check_nulls)
1901  {
1902  }
1903 
1904  template <typename T, CUDF_ENABLE_IF(not cudf::is_nested<T>())>
1905  __device__ hash_value_type operator()(column_device_view const& col,
1906  size_type row_index) const noexcept
1907  {
1908  return _element_hasher.template operator()<T>(col, row_index);
1909  }
1910 
1911  template <typename T, CUDF_ENABLE_IF(cudf::is_nested<T>())>
1912  __device__ hash_value_type operator()(column_device_view const& col,
1913  size_type row_index) const noexcept
1914  {
1915  auto hash = hash_value_type{0};
1916  column_device_view curr_col = col.slice(row_index, 1);
1917  while (curr_col.type().id() == type_id::STRUCT || curr_col.type().id() == type_id::LIST) {
1918  if (_check_nulls) {
1919  auto validity_it = detail::make_validity_iterator<true>(curr_col);
1920  hash = detail::accumulate(
1921  validity_it, validity_it + curr_col.size(), hash, [](auto hash, auto is_valid) {
1922  return cudf::hashing::detail::hash_combine(hash,
1923  is_valid ? NON_NULL_HASH : NULL_HASH);
1924  });
1925  }
1926  if (curr_col.type().id() == type_id::STRUCT) {
1927  if (curr_col.num_child_columns() == 0) { return hash; }
1928  // Non-empty structs are assumed to be decomposed and contain only one child
1929  curr_col = detail::structs_column_device_view(curr_col).get_sliced_child(0);
1930  } else if (curr_col.type().id() == type_id::LIST) {
1931  auto list_col = detail::lists_column_device_view(curr_col);
1932  auto list_sizes = make_list_size_iterator(list_col);
1933  hash = detail::accumulate(
1934  list_sizes, list_sizes + list_col.size(), hash, [](auto hash, auto size) {
1935  return cudf::hashing::detail::hash_combine(hash, hash_fn<size_type>{}(size));
1936  });
1937  curr_col = list_col.get_sliced_child();
1938  }
1939  }
1940  for (int i = 0; i < curr_col.size(); ++i) {
1941  hash = cudf::hashing::detail::hash_combine(
1942  hash,
1943  type_dispatcher<dispatch_void_if_nested>(curr_col.type(), _element_hasher, curr_col, i));
1944  }
1945  return hash;
1946  }
1947 
1948  element_hasher<hash_fn, Nullate> const _element_hasher;
1949  Nullate const _check_nulls;
1950  };
1951 
1952  CUDF_HOST_DEVICE device_row_hasher(Nullate check_nulls,
1953  table_device_view t,
1954  uint32_t seed = DEFAULT_HASH_SEED) noexcept
1955  : _check_nulls{check_nulls}, _table{t}, _seed(seed)
1956  {
1957  }
1958 
1959  Nullate const _check_nulls;
1960  table_device_view const _table;
1961  uint32_t const _seed;
1962 };
1963 
1964 // Inject row::equality::preprocessed_table into the row::hash namespace
1965 // As a result, row::equality::preprocessed_table and row::hash::preprocessed table are the same
1966 // type and are interchangeable.
1967 using preprocessed_table = row::equality::preprocessed_table;
1968 
1973 class row_hasher {
1974  public:
1983  : d_t(preprocessed_table::create(t, stream))
1984  {
1985  }
1986 
1996  row_hasher(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
1997 
2010  template <template <typename> class hash_function = cudf::hashing::detail::default_hash,
2011  template <template <typename> class, typename>
2012  class DeviceRowHasher = device_row_hasher,
2013  typename Nullate>
2014  DeviceRowHasher<hash_function, Nullate> device_hasher(Nullate nullate = {},
2015  uint32_t seed = DEFAULT_HASH_SEED) const
2016  {
2017  return DeviceRowHasher<hash_function, Nullate>(nullate, *d_t, seed);
2018  }
2019 
2020  private:
2021  std::shared_ptr<preprocessed_table> d_t;
2022 };
2023 
2024 } // namespace hash
2025 
2026 } // namespace row
2027 
2028 } // namespace experimental
2029 } // namespace CUDF_EXPORT cudf
An immutable, non-owning view of device data as a column of elements that is trivially copyable and u...
T element(size_type element_index) const noexcept
Returns reference to element at the specified index.
CUDF_HOST_DEVICE column_device_view slice(size_type offset, size_type size) const noexcept
Get a new column_device_view which is a slice of this column.
CUDF_HOST_DEVICE size_type num_child_columns() const noexcept
Returns the number of child columns.
const_iterator< T > begin() const
Return an iterator to the first element of the column.
const_iterator< T > end() const
Returns an iterator to the element following the last element of the column.
A container of nullable device data as a column of elements.
Definition: column.hpp:47
constexpr type_id id() const noexcept
Returns the type identifier.
Definition: types.hpp:287
CUDF_HOST_DEVICE data_type type() const noexcept
Returns the element type.
bool is_null(size_type element_index) const noexcept
Returns whether the specified element is null.
CUDF_HOST_DEVICE bool nullable() const noexcept
Indicates whether the column can contain null elements, i.e., if it has an allocated bitmask.
Given a column_device_view, an instance of this class provides a wrapper on this compound column for ...
column_device_view get_sliced_child() const
Fetches the child column of the underlying list column with offset and size applied.
Given a column_device_view, an instance of this class provides a wrapper on this compound column for ...
column_device_view get_sliced_child(size_type idx) const
Fetches the child column of the underlying struct column.
constexpr bool operator()(size_type const lhs_index, size_type const rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table is equal to the row at rhs_index in the rhs tabl...
Comparator for performing equality comparisons between two rows of the same table.
self_comparator(std::shared_ptr< preprocessed_table > t)
Construct an owning object for performing equality comparisons between two rows of the same table.
self_comparator(table_view const &t, rmm::cuda_stream_view stream)
Construct an owning object for performing equality comparisons between two rows of the same table.
auto equal_to(Nullate nullate={}, null_equality nulls_are_equal=null_equality::EQUAL, PhysicalEqualityComparator comparator={}) const noexcept
Get the comparison operator to use on the device.
An owning object that can be used to equality compare rows of two different tables.
two_table_comparator(std::shared_ptr< preprocessed_table > left, std::shared_ptr< preprocessed_table > right)
Construct an owning object for performing equality comparisons between two rows from two tables.
auto equal_to(Nullate nullate={}, null_equality nulls_are_equal=null_equality::EQUAL, PhysicalEqualityComparator comparator={}) const noexcept
Return the binary operator for comparing rows in the table.
two_table_comparator(table_view const &left, table_view const &right, rmm::cuda_stream_view stream)
Construct an owning object for performing equality comparisons between two rows from two tables.
Computes the hash value of a row in the given table.
auto operator()(size_type row_index) const noexcept
Return the hash value of a row in the given table.
Computes the hash value of an element in the given column.
element_hasher(Nullate nulls, uint32_t seed=DEFAULT_HASH_SEED, hash_value_type null_hash=std::numeric_limits< hash_value_type >::max()) noexcept
Constructs an element_hasher object.
hash_value_type _null_hash
Hash value to use for null elements.
Computes the hash value of a row in the given table.
DeviceRowHasher< hash_function, Nullate > device_hasher(Nullate nullate={}, uint32_t seed=DEFAULT_HASH_SEED) const
Get the hash operator to use on the device.
row_hasher(table_view const &t, rmm::cuda_stream_view stream)
Construct an owning object for hashing the rows of a table.
row_hasher(std::shared_ptr< preprocessed_table > t)
Construct an owning object for hashing the rows of a table from an existing preprocessed_table.
Performs a relational comparison between two elements in two columns.
cuda::std::pair< weak_ordering, int > operator()(size_type lhs_element_index, size_type rhs_element_index)
Compares two list-type columns.
cuda::std::pair< weak_ordering, int > operator()(size_type const lhs_element_index, size_type const rhs_element_index) const noexcept
Performs a relational comparison between the specified elements.
cuda::std::pair< weak_ordering, int > operator()(size_type const, size_type const) const noexcept
Throws run-time error when columns types cannot be compared or if this class is instantiated with has...
element_comparator(Nullate check_nulls, column_device_view lhs, column_device_view rhs, null_order null_precedence=null_order::BEFORE, int depth=0, PhysicalElementComparator comparator={}, optional_dremel_view l_dremel_device_view={}, optional_dremel_view r_dremel_device_view={})
Construct type-dispatched function object for performing a relational comparison between two elements...
Computes the lexicographic comparison between 2 rows.
device_row_comparator(Nullate check_nulls, table_device_view lhs, table_device_view rhs, std::optional< device_span< order const >> column_order=std::nullopt, std::optional< device_span< null_order const >> null_precedence=std::nullopt, PhysicalElementComparator comparator={}) noexcept
Construct a function object for performing a lexicographic comparison between the rows of two tables....
device_row_comparator(Nullate check_nulls, table_device_view lhs, table_device_view rhs, device_span< detail::dremel_device_view const > l_dremel_device_views, device_span< detail::dremel_device_view const > r_dremel_device_views, std::optional< device_span< int const >> depth=std::nullopt, std::optional< device_span< order const >> column_order=std::nullopt, std::optional< device_span< null_order const >> null_precedence=std::nullopt, PhysicalElementComparator comparator={}) noexcept
Construct a function object for performing a lexicographic comparison between the rows of two tables.
constexpr weak_ordering operator()(size_type const lhs_index, size_type const rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table compares lexicographically less,...
An owning object that can be used to lexicographically compare two rows of the same table.
auto less_equivalent(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
self_comparator(table_view const &t, host_span< order const > column_order={}, host_span< null_order const > null_precedence={}, rmm::cuda_stream_view stream=cudf::get_default_stream())
Construct an owning object for performing a lexicographic comparison between two rows of the same tab...
self_comparator(std::shared_ptr< preprocessed_table > t)
Construct an owning object for performing a lexicographic comparison between two rows of the same pre...
auto less(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
An owning object that can be used to lexicographically compare rows of two different tables.
two_table_comparator(std::shared_ptr< preprocessed_table > left, std::shared_ptr< preprocessed_table > right)
Construct an owning object for performing a lexicographic comparison between two rows of the same pre...
auto less(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
two_table_comparator(table_view const &left, table_view const &right, host_span< order const > column_order={}, host_span< null_order const > null_precedence={}, rmm::cuda_stream_view stream=cudf::get_default_stream())
Construct an owning object for performing a lexicographic comparison between rows of two different ta...
auto less_equivalent(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
Table device view that is usable in device memory.
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:200
A set of cudf::column's of the same size.
Definition: table.hpp:40
Column device view class definitions.
uint32_t hash_value_type
Type of hash value.
Definition: hashing.hpp:35
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
std::unique_ptr< cudf::column > is_valid(cudf::column_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Creates a column of type_id::BOOL8 elements where for every element in input true indicates the value...
CUDF_HOST_DEVICE constexpr decltype(auto) __forceinline__ type_dispatcher(cudf::data_type dtype, Functor f, Ts &&... args)
Invokes an operator() template with the type instantiation based on the specified cudf::data_type's i...
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:178
null_order
Indicates how null values compare against all other values.
Definition: types.hpp:159
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:151
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:95
#define CUDF_ENABLE_IF(...)
Convenience macro for SFINAE as an unnamed template parameter.
Definition: traits.hpp:50
cuDF interfaces
Definition: aggregation.hpp:35
weak_ordering
Result type of the element_relational_comparator function object.
@ EQUIVALENT
Indicates a is ordered neither before nor after b
auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence)
Compare the nulls according to null order.
CUDF_HOST_DEVICE auto make_list_size_iterator(detail::lists_column_device_view const &c)
Makes an iterator that returns size of the list by row index.
Column APIs for sort and rank.
Device version of C++20 std::span with reduced feature set.
Definition: span.hpp:328
A map from cudf::type_id to cudf type that excludes LIST and STRUCT types.
std::conditional_t< t==type_id::STRUCT or t==type_id::LIST, void, id_to_type< t > > type
The type to dispatch to if the type is nested.
Equality comparator functor that compares physical values rather than logical elements like lists,...
constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
Operator for equality comparison of non-floating point values.
Equality comparator functor that compares physical values rather than logical elements like lists,...
constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
Operator for equality comparisons.
Preprocessed table for use with row equality comparison or row hashing.
static std::shared_ptr< preprocessed_table > create(table_view const &table, rmm::cuda_stream_view stream)
Factory to construct preprocessed_table for use with row equality comparison or row hashing.
Wraps and interprets the result of device_row_comparator, true if the result is weak_ordering::LESS m...
less_comparator(Comparator const &comparator)
Constructs a less_comparator.
Wraps and interprets the result of device_row_comparator, true if the result is weak_ordering::LESS o...
less_equivalent_comparator(Comparator const &comparator)
Constructs a less_equivalent_comparator.
Computes a weak ordering of two values with special sorting behavior.
constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
Operator for relational comparisons.
Preprocessed table for use with lexicographical comparison.
std::invoke_result_t< decltype(table_device_view::create), table_view, rmm::cuda_stream_view > table_device_view_owner
Type of table device view owner for the preprocessed table.
static std::pair< std::shared_ptr< preprocessed_table >, std::shared_ptr< preprocessed_table > > create(table_view const &lhs, table_view const &rhs, host_span< order const > column_order, host_span< null_order const > null_precedence, rmm::cuda_stream_view stream)
Preprocess tables for use with lexicographical comparison.
static std::shared_ptr< preprocessed_table > create(table_view const &table, host_span< order const > column_order, host_span< null_order const > null_precedence, rmm::cuda_stream_view stream)
Preprocess table for use with lexicographical comparison.
Relational comparator functor that compares physical values rather than logical elements like lists,...
constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
Operator for relational comparison of non-floating point values.
A counting iterator that uses strongly typed indices bound to tables.
thrust::iterator_adaptor< strong_index_iterator< Index >, Index > super_t
The base class.
constexpr strong_index_iterator(Underlying n)
Constructs a strong index iterator.
C++20 std::span with reduced feature set.
Definition: span.hpp:231
Indicates the presence of nulls at compile-time or runtime.
Table device view class definitions.
bool has_nested_columns(table_view const &table)
Determine if any nested columns exist in a given table.
Defines the mapping between cudf::type_id runtime type information and concrete C++ types.
#define CUDF_HOST_DEVICE
Indicates that the function or method is usable on host and device.
Definition: types.hpp:32