experimental/row_operators.cuh
1 /*
2  * Copyright (c) 2022-2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
20 #include <cudf/detail/iterator.cuh>
21 #include <cudf/detail/utilities/algorithm.cuh>
22 #include <cudf/detail/utilities/assert.cuh>
23 #include <cudf/hashing/detail/default_hash.cuh>
24 #include <cudf/hashing/detail/hashing.hpp>
25 #include <cudf/lists/detail/dremel.hpp>
26 #include <cudf/lists/list_device_view.cuh>
27 #include <cudf/lists/lists_column_device_view.cuh>
28 #include <cudf/sorting.hpp>
29 #include <cudf/structs/structs_column_device_view.cuh>
30 #include <cudf/table/row_operators.cuh>
35 
36 #include <cuda/std/functional>
37 #include <cuda/std/limits>
38 #include <cuda/std/optional>
39 #include <cuda/std/tuple>
40 #include <cuda/std/utility>
41 #include <thrust/detail/use_default.h>
42 #include <thrust/equal.h>
43 #include <thrust/execution_policy.h>
44 #include <thrust/functional.h>
45 #include <thrust/iterator/counting_iterator.h>
46 #include <thrust/iterator/iterator_adaptor.h>
47 #include <thrust/iterator/iterator_categories.h>
48 #include <thrust/iterator/iterator_facade.h>
49 #include <thrust/iterator/transform_iterator.h>
50 #include <thrust/logical.h>
51 #include <thrust/swap.h>
52 #include <thrust/transform_reduce.h>
53 
54 #include <memory>
55 #include <type_traits>
56 
57 namespace CUDF_EXPORT cudf {
58 
59 namespace row::primitive {
60 class row_equality_comparator; // Forward declaration
61 
62 template <template <typename> class Hash>
63 class row_hasher; // Forward declaration
64 } // namespace row::primitive
65 
66 namespace experimental {
67 
83 template <cudf::type_id t>
86  using type = std::conditional_t<t == type_id::STRUCT or t == type_id::LIST, void, id_to_type<t>>;
87 };
88 
89 namespace row {
90 
91 enum class lhs_index_type : size_type {};
92 enum class rhs_index_type : size_type {};
93 
107 template <typename Index, typename Underlying = std::underlying_type_t<Index>>
108 struct strong_index_iterator : public thrust::iterator_facade<strong_index_iterator<Index>,
109  Index,
110  thrust::use_default,
111  thrust::random_access_traversal_tag,
112  Index,
113  Underlying> {
114  using super_t =
115  thrust::iterator_adaptor<strong_index_iterator<Index>, Index>;
116 
122  explicit constexpr strong_index_iterator(Underlying n) : begin{n} {}
123 
124  friend class thrust::iterator_core_access;
125 
126  private:
127  __device__ constexpr void increment() { ++begin; }
128  __device__ constexpr void decrement() { --begin; }
129 
130  __device__ constexpr void advance(Underlying n) { begin += n; }
131 
132  __device__ constexpr bool equal(strong_index_iterator<Index> const& other) const noexcept
133  {
134  return begin == other.begin;
135  }
136 
137  __device__ constexpr Index dereference() const noexcept { return static_cast<Index>(begin); }
138 
139  __device__ constexpr Underlying distance_to(
140  strong_index_iterator<Index> const& other) const noexcept
141  {
142  return other.begin - begin;
143  }
144 
145  Underlying begin{};
146 };
147 
151 using lhs_iterator = strong_index_iterator<lhs_index_type>;
152 
156 using rhs_iterator = strong_index_iterator<rhs_index_type>;
157 
158 namespace lexicographic {
159 
175  template <typename Element>
176  __device__ constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
177  {
178  return detail::compare_elements(lhs, rhs);
179  }
180 };
181 
195  template <typename Element, CUDF_ENABLE_IF(not std::is_floating_point_v<Element>)>
196  __device__ constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
197  {
198  return detail::compare_elements(lhs, rhs);
199  }
200 
208  template <typename Element, CUDF_ENABLE_IF(std::is_floating_point_v<Element>)>
209  __device__ constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
210  {
211  if (isnan(lhs)) {
212  return isnan(rhs) ? weak_ordering::EQUIVALENT : weak_ordering::GREATER;
213  } else if (isnan(rhs)) {
214  return weak_ordering::LESS;
215  }
216 
217  return detail::compare_elements(lhs, rhs);
218  }
219 };
220 
221 using optional_dremel_view = cuda::std::optional<detail::dremel_device_view const>;
222 
223 // The has_nested_columns template parameter of the device_row_comparator is
224 // necessary to help the compiler optimize our code. Without it, the list and
225 // struct view specializations are present in the code paths used for primitive
226 // types, and the compiler fails to inline this nearly as well resulting in a
227 // significant performance drop. As a result, there is some minor tension in
228 // the current design between the presence of this parameter and the way that
229 // the Dremel data is passed around, first as a
230 // std::optional<device_span<dremel_device_view>> in the
231 // preprocessed_table/device_row_comparator (which is always valid when
232 // has_nested_columns and is otherwise invalid) that is then unpacked to a
233 // cuda::std::optional<dremel_device_view> at the element_comparator level (which
234 // is always valid for a list column and otherwise invalid). We cannot use an
235 // additional template parameter for the element_comparator on a per-column
236 // basis because we cannot conditionally define dremel_device_view member
237 // variables without jumping through extra hoops with inheritance, so the
238 // cuda::std::optional<dremel_device_view> member must be an optional rather than
239 // a raw dremel_device_view.
270 template <bool has_nested_columns,
271  typename Nullate,
272  typename PhysicalElementComparator = sorting_physical_element_comparator>
274  public:
275  friend class self_comparator;
276  friend class two_table_comparator;
277 
298  Nullate check_nulls,
299  table_device_view lhs,
300  table_device_view rhs,
301  device_span<detail::dremel_device_view const> l_dremel_device_views,
302  device_span<detail::dremel_device_view const> r_dremel_device_views,
303  cuda::std::optional<device_span<int const>> depth = cuda::std::nullopt,
304  cuda::std::optional<device_span<order const>> column_order = cuda::std::nullopt,
305  cuda::std::optional<device_span<null_order const>> null_precedence = cuda::std::nullopt,
306  PhysicalElementComparator comparator = {}) noexcept
307  : _lhs{lhs},
308  _rhs{rhs},
309  _l_dremel(l_dremel_device_views),
310  _r_dremel(r_dremel_device_views),
311  _check_nulls{check_nulls},
312  _depth{depth},
313  _column_order{column_order},
314  _null_precedence{null_precedence},
315  _comparator{comparator}
316  {
317  }
318 
337  template <bool nested_disable = not has_nested_columns, CUDF_ENABLE_IF(nested_disable)>
339  Nullate check_nulls,
340  table_device_view lhs,
341  table_device_view rhs,
342  cuda::std::optional<device_span<order const>> column_order = cuda::std::nullopt,
343  cuda::std::optional<device_span<null_order const>> null_precedence = cuda::std::nullopt,
344  PhysicalElementComparator comparator = {}) noexcept
345  : _lhs{lhs},
346  _rhs{rhs},
347  _l_dremel{},
348  _r_dremel{},
349  _check_nulls{check_nulls},
350  _depth{},
351  _column_order{column_order},
352  _null_precedence{null_precedence},
353  _comparator{comparator}
354  {
355  }
356 
361  public:
378  __device__ element_comparator(Nullate check_nulls,
379  column_device_view lhs,
380  column_device_view rhs,
381  null_order null_precedence = null_order::BEFORE,
382  int depth = 0,
383  PhysicalElementComparator comparator = {},
384  optional_dremel_view l_dremel_device_view = {},
385  optional_dremel_view r_dremel_device_view = {})
386  : _lhs{lhs},
387  _rhs{rhs},
388  _check_nulls{check_nulls},
389  _null_precedence{null_precedence},
390  _depth{depth},
391  _l_dremel_device_view{l_dremel_device_view},
392  _r_dremel_device_view{r_dremel_device_view},
393  _comparator{comparator}
394  {
395  }
396 
405  template <typename Element,
406  CUDF_ENABLE_IF(cudf::is_relationally_comparable<Element, Element>())>
407  __device__ cuda::std::pair<weak_ordering, int> operator()(
408  size_type const lhs_element_index, size_type const rhs_element_index) const noexcept
409  {
410  if (_check_nulls) {
411  bool const lhs_is_null{_lhs.is_null(lhs_element_index)};
412  bool const rhs_is_null{_rhs.is_null(rhs_element_index)};
413 
414  if (lhs_is_null or rhs_is_null) { // at least one is null
415  return cuda::std::pair(null_compare(lhs_is_null, rhs_is_null, _null_precedence), _depth);
416  }
417  }
418 
419  return cuda::std::pair(_comparator(_lhs.element<Element>(lhs_element_index),
420  _rhs.element<Element>(rhs_element_index)),
421  cuda::std::numeric_limits<int>::max());
422  }
423 
431  template <typename Element,
432  CUDF_ENABLE_IF(not cudf::is_relationally_comparable<Element, Element>() and
433  (not has_nested_columns or not cudf::is_nested<Element>()))>
434  __device__ cuda::std::pair<weak_ordering, int> operator()(size_type const,
435  size_type const) const noexcept
436  {
437  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
438  }
439 
448  template <typename Element,
449  CUDF_ENABLE_IF(has_nested_columns and std::is_same_v<Element, cudf::struct_view>)>
450  __device__ cuda::std::pair<weak_ordering, int> operator()(
451  size_type const lhs_element_index, size_type const rhs_element_index) const noexcept
452  {
453  column_device_view lcol = _lhs;
454  column_device_view rcol = _rhs;
455  int depth = _depth;
456  while (lcol.type().id() == type_id::STRUCT) {
457  bool const lhs_is_null{lcol.is_null(lhs_element_index)};
458  bool const rhs_is_null{rcol.is_null(rhs_element_index)};
459 
460  if (lhs_is_null or rhs_is_null) { // at least one is null
461  weak_ordering state = null_compare(lhs_is_null, rhs_is_null, _null_precedence);
462  return cuda::std::pair(state, depth);
463  }
464 
465  if (lcol.num_child_columns() == 0) {
466  return cuda::std::pair(weak_ordering::EQUIVALENT, cuda::std::numeric_limits<int>::max());
467  }
468 
469  // Non-empty structs have been modified to only have 1 child when using this.
472  ++depth;
473  }
474 
475  return cudf::type_dispatcher<dispatch_void_if_nested>(
476  lcol.type(),
477  element_comparator{_check_nulls, lcol, rcol, _null_precedence, depth, _comparator},
478  lhs_element_index,
479  rhs_element_index);
480  }
481 
490  template <typename Element,
491  CUDF_ENABLE_IF(has_nested_columns and std::is_same_v<Element, cudf::list_view>)>
492  __device__ cuda::std::pair<weak_ordering, int> operator()(size_type lhs_element_index,
493  size_type rhs_element_index)
494  {
495  // only order top-NULLs according to null_order
496  auto const is_l_row_null = _lhs.is_null(lhs_element_index);
497  auto const is_r_row_null = _rhs.is_null(rhs_element_index);
498  if (is_l_row_null || is_r_row_null) {
499  return cuda::std::pair(null_compare(is_l_row_null, is_r_row_null, _null_precedence),
500  _depth);
501  }
502 
503  // These are all the values from the Dremel encoding.
504  auto const l_max_def_level = _l_dremel_device_view->max_def_level;
505  auto const r_max_def_level = _r_dremel_device_view->max_def_level;
506  auto const l_def_levels = _l_dremel_device_view->def_levels;
507  auto const r_def_levels = _r_dremel_device_view->def_levels;
508  auto const l_rep_levels = _l_dremel_device_view->rep_levels;
509  auto const r_rep_levels = _r_dremel_device_view->rep_levels;
510 
511  // Traverse the nested list hierarchy to get a column device view
512  // pointing to the underlying child data.
513  column_device_view lcol = _lhs.slice(lhs_element_index, 1);
514  column_device_view rcol = _rhs.slice(rhs_element_index, 1);
515 
516  while (lcol.type().id() == type_id::LIST) {
519  }
520 
521  // These start and end values indicate the start and end points of all
522  // the elements of the lists in the current list element
523  // (`[lhs|rhs]_element_index`) that we are comparing.
524  auto const l_offsets = _l_dremel_device_view->offsets;
525  auto const r_offsets = _r_dremel_device_view->offsets;
526  auto l_start = l_offsets[lhs_element_index];
527  auto l_end = l_offsets[lhs_element_index + 1];
528  auto r_start = r_offsets[rhs_element_index];
529  auto r_end = r_offsets[rhs_element_index + 1];
530 
531  // This comparator will be used to compare leaf (non-nested) data types.
532  auto comparator =
533  element_comparator{_check_nulls, lcol, rcol, _null_precedence, _depth, _comparator};
534 
535  // Loop over each element in the encoding. Note that this includes nulls
536  // and empty lists, so not every index corresponds to an actual element
537  // in the child column. The element_index is used to keep track of the current
538  // child element that we're actually comparing.
539  for (int l_dremel_index = l_start, r_dremel_index = r_start, element_index = 0;
540  l_dremel_index < l_end and r_dremel_index < r_end;
541  ++l_dremel_index, ++r_dremel_index) {
542  auto const l_rep_level = l_rep_levels[l_dremel_index];
543  auto const r_rep_level = r_rep_levels[r_dremel_index];
544 
545  // early exit for smaller sub-list
546  if (l_rep_level != r_rep_level) {
547  // the lower repetition level is a smaller sub-list
548  return l_rep_level < r_rep_level ? cuda::std::pair(weak_ordering::LESS, _depth)
549  : cuda::std::pair(weak_ordering::GREATER, _depth);
550  }
551 
552  // only compare if left and right are at same nesting level
553  auto const l_def_level = l_def_levels[l_dremel_index];
554  auto const r_def_level = r_def_levels[r_dremel_index];
555 
556  // either left or right are empty or NULLs of arbitrary nesting
557  if (l_def_level < l_max_def_level || r_def_level < r_max_def_level) {
558  // in the fully unraveled version of the list column, only the
559  // most nested NULLs and leafs are present
560  // In this rare condition that we get to the most nested NULL, we increment
561  // element_index because either both rows have a deeply nested NULL at the
562  // same position, and we'll "continue" in our iteration, or we will early
563  // exit if only one of the rows has a deeply nested NULL
564  if ((lcol.nullable() and l_def_levels[l_dremel_index] == l_max_def_level - 1) or
565  (rcol.nullable() and r_def_levels[r_dremel_index] == r_max_def_level - 1)) {
566  ++element_index;
567  }
568  if (l_def_level == r_def_level) { continue; }
569  // We require [] < [NULL] < [leaf] for nested nulls.
570  // The null_precedence only affects top level nulls.
571  return l_def_level < r_def_level ? cuda::std::pair(weak_ordering::LESS, _depth)
572  : cuda::std::pair(weak_ordering::GREATER, _depth);
573  }
574 
575  // finally, compare leaf to leaf
576  weak_ordering state{weak_ordering::EQUIVALENT};
577  int last_null_depth = _depth;
578  cuda::std::tie(state, last_null_depth) = cudf::type_dispatcher<dispatch_void_if_nested>(
579  lcol.type(), comparator, element_index, element_index);
580  if (state != weak_ordering::EQUIVALENT) { return cuda::std::pair(state, _depth); }
581  ++element_index;
582  }
583 
584  // If we have reached this stage, we know that definition levels,
585  // repetition levels, and actual elements are identical in both list
586  // columns up to the `min(l_end - l_start, r_end - r_start)` element of
587  // the Dremel encoding. However, two lists can only compare equivalent if
588  // they are of the same length. Otherwise, the shorter of the two is less
589  // than the longer. This final check determines the appropriate resulting
590  // ordering by checking how many total elements each list is composed of.
591  return cuda::std::pair(detail::compare_elements(l_end - l_start, r_end - r_start), _depth);
592  }
593 
594  private:
595  column_device_view const _lhs;
596  column_device_view const _rhs;
597  Nullate const _check_nulls;
598  null_order const _null_precedence;
599  int const _depth;
600  optional_dremel_view _l_dremel_device_view;
601  optional_dremel_view _r_dremel_device_view;
602  PhysicalElementComparator const _comparator;
603  };
604 
605  public:
615  __device__ constexpr weak_ordering operator()(size_type const lhs_index,
616  size_type const rhs_index) const noexcept
617  {
618  int last_null_depth = cuda::std::numeric_limits<int>::max();
619  size_type list_column_index{-1};
620  for (size_type i = 0; i < _lhs.num_columns(); ++i) {
621  if (_lhs.column(i).type().id() == type_id::LIST) { ++list_column_index; }
622 
623  int const depth = _depth.has_value() ? (*_depth)[i] : 0;
624  if (depth > last_null_depth) { continue; }
625 
626  bool const ascending =
627  _column_order.has_value() ? (*_column_order)[i] == order::ASCENDING : true;
628 
629  null_order const null_precedence =
630  _null_precedence.has_value() ? (*_null_precedence)[i] : null_order::BEFORE;
631 
632  // TODO: At what point do we verify that the columns of lhs and rhs are
633  // all of the same types? I assume that it's already happened before
634  // here, otherwise the current code would be failing.
635  auto const [l_dremel_i, r_dremel_i] =
636  _lhs.column(i).type().id() == type_id::LIST
637  ? cuda::std::make_tuple(optional_dremel_view(_l_dremel[list_column_index]),
638  optional_dremel_view(_r_dremel[list_column_index]))
639  : cuda::std::make_tuple(optional_dremel_view{}, optional_dremel_view{});
640 
641  auto element_comp = element_comparator{_check_nulls,
642  _lhs.column(i),
643  _rhs.column(i),
644  null_precedence,
645  depth,
646  _comparator,
647  l_dremel_i,
648  r_dremel_i};
649 
650  weak_ordering state;
651  cuda::std::tie(state, last_null_depth) =
652  cudf::type_dispatcher(_lhs.column(i).type(), element_comp, lhs_index, rhs_index);
653 
654  if (state == weak_ordering::EQUIVALENT) { continue; }
655 
656  return ascending
657  ? state
658  : (state == weak_ordering::GREATER ? weak_ordering::LESS : weak_ordering::GREATER);
659  }
660  return weak_ordering::EQUIVALENT;
661  }
662 
663  private:
664  table_device_view const _lhs;
665  table_device_view const _rhs;
668  Nullate const _check_nulls;
669  cuda::std::optional<device_span<int const>> const _depth;
670  cuda::std::optional<device_span<order const>> const _column_order;
671  cuda::std::optional<device_span<null_order const>> const _null_precedence;
672  PhysicalElementComparator const _comparator;
673 }; // class device_row_comparator
674 
685 template <typename Comparator, weak_ordering... values>
686 struct weak_ordering_comparator_impl {
687  static_assert(not((weak_ordering::EQUIVALENT == values) && ...),
688  "weak_ordering_comparator should not be used for pure equality comparisons. The "
689  "`row_equality_comparator` should be used instead");
690 
691  template <typename LhsType, typename RhsType>
692  __device__ constexpr bool operator()(LhsType const lhs_index,
693  RhsType const rhs_index) const noexcept
694  {
695  weak_ordering const result = comparator(lhs_index, rhs_index);
696  return ((result == values) || ...);
697  }
698  Comparator const comparator;
699 };
700 
707 template <typename Comparator>
708 struct less_comparator : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS> {
714  less_comparator(Comparator const& comparator)
715  : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS>{comparator}
716  {
717  }
718 };
719 
727 template <typename Comparator>
729  : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS, weak_ordering::EQUIVALENT> {
735  less_equivalent_comparator(Comparator const& comparator)
736  : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS, weak_ordering::EQUIVALENT>{
737  comparator}
738  {
739  }
740 };
741 
749  std::invoke_result_t<decltype(table_device_view::create), table_view, rmm::cuda_stream_view>;
750 
773  static std::shared_ptr<preprocessed_table> create(table_view const& table,
774  host_span<order const> column_order,
775  host_span<null_order const> null_precedence,
776  rmm::cuda_stream_view stream);
777 
799  static std::pair<std::shared_ptr<preprocessed_table>, std::shared_ptr<preprocessed_table>> create(
800  table_view const& lhs,
801  table_view const& rhs,
802  host_span<order const> column_order,
803  host_span<null_order const> null_precedence,
804  rmm::cuda_stream_view stream);
805 
806  private:
807  friend class self_comparator;
808  friend class two_table_comparator;
809 
829  static std::shared_ptr<preprocessed_table> create(
830  table_view const& preprocessed_input,
831  std::vector<int>&& verticalized_col_depths,
832  std::vector<std::unique_ptr<column>>&& transformed_columns,
833  host_span<order const> column_order,
834  host_span<null_order const> null_precedence,
835  bool has_ranked_children,
836  rmm::cuda_stream_view stream);
837 
865  rmm::device_uvector<order>&& column_order,
866  rmm::device_uvector<null_order>&& null_precedence,
868  std::vector<detail::dremel_data>&& dremel_data,
870  std::vector<std::unique_ptr<column>>&& transformed_columns,
871  bool has_ranked_children);
872 
874  rmm::device_uvector<order>&& column_order,
875  rmm::device_uvector<null_order>&& null_precedence,
877  std::vector<std::unique_ptr<column>>&& transformed_columns,
878  bool has_ranked_children);
879 
885  operator table_device_view() { return *_t; }
886 
893  [[nodiscard]] cuda::std::optional<device_span<order const>> column_order() const
894  {
895  return _column_order.size() ? cuda::std::optional<device_span<order const>>(_column_order)
896  : cuda::std::nullopt;
897  }
898 
906  [[nodiscard]] cuda::std::optional<device_span<null_order const>> null_precedence() const
907  {
908  return _null_precedence.size()
909  ? cuda::std::optional<device_span<null_order const>>(_null_precedence)
910  : cuda::std::nullopt;
911  }
912 
921  [[nodiscard]] cuda::std::optional<device_span<int const>> depths() const
922  {
923  return _depths.size() ? cuda::std::optional<device_span<int const>>(_depths)
924  : cuda::std::nullopt;
925  }
926 
927  [[nodiscard]] device_span<detail::dremel_device_view const> dremel_device_views() const
928  {
929  if (_dremel_device_views.has_value()) {
930  return device_span<detail::dremel_device_view const>(*_dremel_device_views);
931  } else {
932  return {};
933  }
934  }
935 
936  template <typename PhysicalElementComparator>
937  void check_physical_element_comparator()
938  {
939  if constexpr (!std::is_same_v<PhysicalElementComparator, sorting_physical_element_comparator>) {
940  CUDF_EXPECTS(!_has_ranked_children,
941  "The input table has nested type children and they were transformed using a "
942  "different type of physical element comparator.");
943  }
944  }
945 
946  private:
947  table_device_view_owner const _t;
948  rmm::device_uvector<order> const _column_order;
949  rmm::device_uvector<null_order> const _null_precedence;
950  rmm::device_uvector<size_type> const _depths;
951 
952  // Dremel encoding of list columns used for the comparison algorithm
953  cuda::std::optional<std::vector<detail::dremel_data>> _dremel_data;
954  cuda::std::optional<rmm::device_uvector<detail::dremel_device_view>> _dremel_device_views;
955 
956  // Intermediate columns generated from transforming nested children columns into
957  // integers columns using `cudf::rank()`, need to be kept alive.
958  std::vector<std::unique_ptr<column>> _transformed_columns;
959 
960  // Flag to record if the input table was preprocessed to transform any nested children column(s)
961  // into integer column(s) using `cudf::rank`.
962  bool const _has_ranked_children;
963 };
964 
979  public:
995  host_span<order const> column_order = {},
996  host_span<null_order const> null_precedence = {},
998  : d_t{preprocessed_table::create(t, column_order, null_precedence, stream)}
999  {
1000  }
1001 
1011  self_comparator(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
1012 
1041  template <bool has_nested_columns,
1042  typename Nullate,
1043  typename PhysicalElementComparator = sorting_physical_element_comparator>
1044  auto less(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const
1045  {
1046  d_t->check_physical_element_comparator<PhysicalElementComparator>();
1047 
1048  return less_comparator{
1049  device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1050  nullate,
1051  *d_t,
1052  *d_t,
1053  d_t->dremel_device_views(),
1054  d_t->dremel_device_views(),
1055  d_t->depths(),
1056  d_t->column_order(),
1057  d_t->null_precedence(),
1058  comparator}};
1059  }
1060 
1062  template <bool has_nested_columns,
1063  typename Nullate,
1064  typename PhysicalElementComparator = sorting_physical_element_comparator>
1065  auto less_equivalent(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const
1066  {
1067  d_t->check_physical_element_comparator<PhysicalElementComparator>();
1068 
1069  return less_equivalent_comparator{
1070  device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1071  nullate,
1072  *d_t,
1073  *d_t,
1074  d_t->dremel_device_views(),
1075  d_t->dremel_device_views(),
1076  d_t->depths(),
1077  d_t->column_order(),
1078  d_t->null_precedence(),
1079  comparator}};
1080  }
1081 
1082  private:
1083  std::shared_ptr<preprocessed_table> d_t;
1084 };
1085 
1086 // @cond
1087 template <typename Comparator>
1088 struct strong_index_comparator_adapter {
1089  strong_index_comparator_adapter(Comparator const& comparator) : comparator{comparator} {}
1090 
1091  __device__ constexpr weak_ordering operator()(lhs_index_type const lhs_index,
1092  rhs_index_type const rhs_index) const noexcept
1093  {
1094  return comparator(static_cast<cudf::size_type>(lhs_index),
1095  static_cast<cudf::size_type>(rhs_index));
1096  }
1097 
1098  __device__ constexpr weak_ordering operator()(rhs_index_type const rhs_index,
1099  lhs_index_type const lhs_index) const noexcept
1100  {
1101  auto const left_right_ordering =
1102  comparator(static_cast<cudf::size_type>(lhs_index), static_cast<cudf::size_type>(rhs_index));
1103 
1104  // Invert less/greater values to reflect right to left ordering
1105  if (left_right_ordering == weak_ordering::LESS) {
1106  return weak_ordering::GREATER;
1107  } else if (left_right_ordering == weak_ordering::GREATER) {
1108  return weak_ordering::LESS;
1109  }
1110  return weak_ordering::EQUIVALENT;
1111  }
1112 
1113  Comparator const comparator;
1114 };
1115 // @endcond
1116 
1132  public:
1152  table_view const& right,
1153  host_span<order const> column_order = {},
1154  host_span<null_order const> null_precedence = {},
1156 
1171  two_table_comparator(std::shared_ptr<preprocessed_table> left,
1172  std::shared_ptr<preprocessed_table> right)
1173  : d_left_table{std::move(left)}, d_right_table{std::move(right)}
1174  {
1175  }
1176 
1213  template <bool has_nested_columns,
1214  typename Nullate,
1215  typename PhysicalElementComparator = sorting_physical_element_comparator>
1216  auto less(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const
1217  {
1218  d_left_table->check_physical_element_comparator<PhysicalElementComparator>();
1219  d_right_table->check_physical_element_comparator<PhysicalElementComparator>();
1220 
1221  return less_comparator{strong_index_comparator_adapter{
1222  device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1223  nullate,
1224  *d_left_table,
1225  *d_right_table,
1226  d_left_table->dremel_device_views(),
1227  d_right_table->dremel_device_views(),
1228  d_left_table->depths(),
1229  d_left_table->column_order(),
1230  d_left_table->null_precedence(),
1231  comparator}}};
1232  }
1233 
1235  template <bool has_nested_columns,
1236  typename Nullate,
1237  typename PhysicalElementComparator = sorting_physical_element_comparator>
1238  auto less_equivalent(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const
1239  {
1240  d_left_table->check_physical_element_comparator<PhysicalElementComparator>();
1241  d_right_table->check_physical_element_comparator<PhysicalElementComparator>();
1242 
1243  return less_equivalent_comparator{strong_index_comparator_adapter{
1244  device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1245  nullate,
1246  *d_left_table,
1247  *d_right_table,
1248  d_left_table->dremel_device_views(),
1249  d_right_table->dremel_device_views(),
1250  d_left_table->depths(),
1251  d_left_table->column_order(),
1252  d_left_table->null_precedence(),
1253  comparator}}};
1254  }
1255 
1256  private:
1257  std::shared_ptr<preprocessed_table> d_left_table;
1258  std::shared_ptr<preprocessed_table> d_right_table;
1259 };
1260 
1261 } // namespace lexicographic
1262 
1263 namespace hash {
1264 class row_hasher;
1265 } // namespace hash
1266 
1267 namespace equality {
1268 
1284  template <typename Element>
1285  __device__ constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
1286  {
1287  return lhs == rhs;
1288  }
1289 };
1290 
1303  template <typename Element, CUDF_ENABLE_IF(not std::is_floating_point_v<Element>)>
1304  __device__ constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
1305  {
1306  return lhs == rhs;
1307  }
1308 
1318  template <typename Element, CUDF_ENABLE_IF(std::is_floating_point_v<Element>)>
1319  __device__ constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
1320  {
1321  return isnan(lhs) and isnan(rhs) ? true : lhs == rhs;
1322  }
1323 };
1324 
1346 template <bool has_nested_columns,
1347  typename Nullate,
1348  typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
1350  friend class self_comparator;
1351  friend class two_table_comparator;
1352 
1353  public:
1362  __device__ constexpr bool operator()(size_type const lhs_index,
1363  size_type const rhs_index) const noexcept
1364  {
1365  auto equal_elements = [lhs_index, rhs_index, this](column_device_view l, column_device_view r) {
1366  return cudf::type_dispatcher(
1367  l.type(),
1368  element_comparator{check_nulls, l, r, nulls_are_equal, comparator},
1369  lhs_index,
1370  rhs_index);
1371  };
1372 
1373  return thrust::equal(thrust::seq, lhs.begin(), lhs.end(), rhs.begin(), equal_elements);
1374  }
1375 
1376  private:
1387  device_row_comparator(Nullate check_nulls,
1388  table_device_view lhs,
1389  table_device_view rhs,
1390  null_equality nulls_are_equal = null_equality::EQUAL,
1391  PhysicalEqualityComparator comparator = {}) noexcept
1392  : lhs{lhs},
1393  rhs{rhs},
1394  check_nulls{check_nulls},
1395  nulls_are_equal{nulls_are_equal},
1396  comparator{comparator}
1397  {
1398  }
1399 
1403  class element_comparator {
1404  public:
1417  __device__ element_comparator(Nullate check_nulls,
1418  column_device_view lhs,
1419  column_device_view rhs,
1420  null_equality nulls_are_equal = null_equality::EQUAL,
1421  PhysicalEqualityComparator comparator = {}) noexcept
1422  : lhs{lhs},
1423  rhs{rhs},
1424  check_nulls{check_nulls},
1425  nulls_are_equal{nulls_are_equal},
1426  comparator{comparator}
1427  {
1428  }
1429 
1438  template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
1439  __device__ bool operator()(size_type const lhs_element_index,
1440  size_type const rhs_element_index) const noexcept
1441  {
1442  if (check_nulls) {
1443  bool const lhs_is_null{lhs.is_null(lhs_element_index)};
1444  bool const rhs_is_null{rhs.is_null(rhs_element_index)};
1445  if (lhs_is_null and rhs_is_null) {
1446  return nulls_are_equal == null_equality::EQUAL;
1447  } else if (lhs_is_null != rhs_is_null) {
1448  return false;
1449  }
1450  }
1451 
1452  return comparator(lhs.element<Element>(lhs_element_index),
1453  rhs.element<Element>(rhs_element_index));
1454  }
1455 
1456  template <typename Element,
1457  CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>() and
1458  (not has_nested_columns or not cudf::is_nested<Element>())),
1459  typename... Args>
1460  __device__ bool operator()(Args...)
1461  {
1462  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
1463  }
1464 
1465  template <typename Element, CUDF_ENABLE_IF(has_nested_columns and cudf::is_nested<Element>())>
1466  __device__ bool operator()(size_type const lhs_element_index,
1467  size_type const rhs_element_index) const noexcept
1468  {
1469  column_device_view lcol = lhs.slice(lhs_element_index, 1);
1470  column_device_view rcol = rhs.slice(rhs_element_index, 1);
1471  while (lcol.type().id() == type_id::STRUCT || lcol.type().id() == type_id::LIST) {
1472  if (check_nulls) {
1473  auto lvalid = detail::make_validity_iterator<true>(lcol);
1474  auto rvalid = detail::make_validity_iterator<true>(rcol);
1475  if (nulls_are_equal == null_equality::UNEQUAL) {
1476  if (thrust::any_of(
1477  thrust::seq, lvalid, lvalid + lcol.size(), cuda::std::logical_not<bool>()) or
1478  thrust::any_of(
1479  thrust::seq, rvalid, rvalid + rcol.size(), cuda::std::logical_not<bool>())) {
1480  return false;
1481  }
1482  } else {
1483  if (not thrust::equal(thrust::seq, lvalid, lvalid + lcol.size(), rvalid)) {
1484  return false;
1485  }
1486  }
1487  }
1488  if (lcol.type().id() == type_id::STRUCT) {
1489  if (lcol.num_child_columns() == 0) { return true; }
1490  // Non-empty structs are assumed to be decomposed and contain only one child
1491  lcol = detail::structs_column_device_view(lcol).get_sliced_child(0);
1492  rcol = detail::structs_column_device_view(rcol).get_sliced_child(0);
1493  } else if (lcol.type().id() == type_id::LIST) {
1494  auto l_list_col = detail::lists_column_device_view(lcol);
1495  auto r_list_col = detail::lists_column_device_view(rcol);
1496 
1497  auto lsizes = make_list_size_iterator(l_list_col);
1498  auto rsizes = make_list_size_iterator(r_list_col);
1499  if (not thrust::equal(thrust::seq, lsizes, lsizes + lcol.size(), rsizes)) {
1500  return false;
1501  }
1502 
1503  lcol = l_list_col.get_sliced_child();
1504  rcol = r_list_col.get_sliced_child();
1505  if (lcol.size() != rcol.size()) { return false; }
1506  }
1507  }
1508 
1509  auto comp = column_comparator{
1510  element_comparator{check_nulls, lcol, rcol, nulls_are_equal, comparator}, lcol.size()};
1511  return type_dispatcher<dispatch_void_if_nested>(lcol.type(), comp);
1512  }
1513 
1514  private:
1522  struct column_comparator {
1523  element_comparator const comp;
1524  size_type const size;
1525 
1531  template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
1532  __device__ bool operator()() const noexcept
1533  {
1534  return thrust::all_of(thrust::seq,
1535  thrust::make_counting_iterator(0),
1536  thrust::make_counting_iterator(0) + size,
1537  [this](auto i) { return comp.template operator()<Element>(i, i); });
1538  }
1539 
1540  template <typename Element,
1541  CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>()),
1542  typename... Args>
1543  __device__ bool operator()(Args...) const noexcept
1544  {
1545  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
1546  }
1547  };
1548 
1549  column_device_view const lhs;
1550  column_device_view const rhs;
1551  Nullate const check_nulls;
1552  null_equality const nulls_are_equal;
1553  PhysicalEqualityComparator const comparator;
1554  };
1555 
1556  table_device_view const lhs;
1557  table_device_view const rhs;
1558  Nullate const check_nulls;
1559  null_equality const nulls_are_equal;
1560  PhysicalEqualityComparator const comparator;
1561 };
1562 
1580  static std::shared_ptr<preprocessed_table> create(table_view const& table,
1581  rmm::cuda_stream_view stream);
1582 
1583  private:
1584  friend class self_comparator;
1585  friend class two_table_comparator;
1586  friend class hash::row_hasher;
1588  friend class ::cudf::row::primitive::row_equality_comparator;
1589 
1590  template <template <typename> class Hash>
1591  friend class ::cudf::row::primitive::row_hasher;
1592 
1593  using table_device_view_owner =
1594  std::invoke_result_t<decltype(table_device_view::create), table_view, rmm::cuda_stream_view>;
1595 
1596  preprocessed_table(table_device_view_owner&& table,
1597  std::vector<rmm::device_buffer>&& null_buffers,
1598  std::vector<std::unique_ptr<column>>&& tmp_columns)
1599  : _t(std::move(table)),
1600  _null_buffers(std::move(null_buffers)),
1601  _tmp_columns(std::move(tmp_columns))
1602  {
1603  }
1604 
1610  operator table_device_view() { return *_t; }
1611 
1612  table_device_view_owner _t;
1613  std::vector<rmm::device_buffer> _null_buffers;
1614  std::vector<std::unique_ptr<column>> _tmp_columns;
1615 };
1616 
1622  public:
1632  : d_t(preprocessed_table::create(t, stream))
1633  {
1634  }
1635 
1645  self_comparator(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
1646 
1672  template <bool has_nested_columns,
1673  typename Nullate,
1674  typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
1675  auto equal_to(Nullate nullate = {},
1676  null_equality nulls_are_equal = null_equality::EQUAL,
1677  PhysicalEqualityComparator comparator = {}) const noexcept
1678  {
1679  return device_row_comparator<has_nested_columns, Nullate, PhysicalEqualityComparator>{
1680  nullate, *d_t, *d_t, nulls_are_equal, comparator};
1681  }
1682 
1683  private:
1684  std::shared_ptr<preprocessed_table> d_t;
1685 };
1686 
1687 // @cond
1688 template <typename Comparator>
1689 struct strong_index_comparator_adapter {
1690  strong_index_comparator_adapter(Comparator const& comparator) : comparator{comparator} {}
1691 
1692  __device__ constexpr bool operator()(lhs_index_type const lhs_index,
1693  rhs_index_type const rhs_index) const noexcept
1694  {
1695  return comparator(static_cast<cudf::size_type>(lhs_index),
1696  static_cast<cudf::size_type>(rhs_index));
1697  }
1698 
1699  __device__ constexpr bool operator()(rhs_index_type const rhs_index,
1700  lhs_index_type const lhs_index) const noexcept
1701  {
1702  return this->operator()(lhs_index, rhs_index);
1703  }
1704 
1705  Comparator const comparator;
1706 };
1707 // @endcond
1708 
1723  public:
1737  table_view const& right,
1738  rmm::cuda_stream_view stream);
1739 
1750  two_table_comparator(std::shared_ptr<preprocessed_table> left,
1751  std::shared_ptr<preprocessed_table> right)
1752  : d_left_table{std::move(left)}, d_right_table{std::move(right)}
1753  {
1754  }
1755 
1786  template <bool has_nested_columns,
1787  typename Nullate,
1788  typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
1789  auto equal_to(Nullate nullate = {},
1790  null_equality nulls_are_equal = null_equality::EQUAL,
1791  PhysicalEqualityComparator comparator = {}) const noexcept
1792  {
1793  return strong_index_comparator_adapter{
1794  device_row_comparator<has_nested_columns, Nullate, PhysicalEqualityComparator>(
1795  nullate, *d_left_table, *d_right_table, nulls_are_equal, comparator)};
1796  }
1797 
1798  private:
1799  std::shared_ptr<preprocessed_table> d_left_table;
1800  std::shared_ptr<preprocessed_table> d_right_table;
1801 };
1802 
1803 } // namespace equality
1804 
1805 namespace hash {
1806 
1813 template <template <typename> class hash_function, typename Nullate>
1815  public:
1823  __device__ element_hasher(
1824  Nullate nulls,
1825  uint32_t seed = DEFAULT_HASH_SEED,
1826  hash_value_type null_hash = cuda::std::numeric_limits<hash_value_type>::max()) noexcept
1827  : _check_nulls(nulls), _seed(seed), _null_hash(null_hash)
1828  {
1829  }
1830 
1839  template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
1840  __device__ hash_value_type operator()(column_device_view const& col,
1841  size_type row_index) const noexcept
1842  {
1843  if (_check_nulls && col.is_null(row_index)) { return _null_hash; }
1844  return hash_function<T>{_seed}(col.element<T>(row_index));
1845  }
1846 
1855  template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
1856  __device__ hash_value_type operator()(column_device_view const& col,
1857  size_type row_index) const noexcept
1858  {
1859  CUDF_UNREACHABLE("Unsupported type in hash.");
1860  }
1861 
1862  Nullate _check_nulls;
1863  uint32_t _seed;
1865 };
1866 
1873 template <template <typename> class hash_function, typename Nullate>
1875  friend class row_hasher;
1876 
1877  public:
1884  __device__ auto operator()(size_type row_index) const noexcept
1885  {
1886  auto it =
1887  thrust::make_transform_iterator(_table.begin(), [row_index, this](auto const& column) {
1888  return cudf::type_dispatcher<dispatch_storage_type>(
1889  column.type(),
1890  element_hasher_adapter<hash_function>{_check_nulls, _seed},
1891  column,
1892  row_index);
1893  });
1894 
1895  // Hash each element and combine all the hash values together
1896  return detail::accumulate(it, it + _table.num_columns(), _seed, [](auto hash, auto h) {
1897  return cudf::hashing::detail::hash_combine(hash, h);
1898  });
1899  }
1900 
1901  private:
1909  template <template <typename> class hash_fn>
1910  class element_hasher_adapter {
1911  static constexpr hash_value_type NULL_HASH = cuda::std::numeric_limits<hash_value_type>::max();
1912  static constexpr hash_value_type NON_NULL_HASH = 0;
1913 
1914  public:
1915  __device__ element_hasher_adapter(Nullate check_nulls, uint32_t seed) noexcept
1916  : _element_hasher(check_nulls, seed), _check_nulls(check_nulls)
1917  {
1918  }
1919 
1920  template <typename T, CUDF_ENABLE_IF(not cudf::is_nested<T>())>
1921  __device__ hash_value_type operator()(column_device_view const& col,
1922  size_type row_index) const noexcept
1923  {
1924  return _element_hasher.template operator()<T>(col, row_index);
1925  }
1926 
1927  template <typename T, CUDF_ENABLE_IF(cudf::is_nested<T>())>
1928  __device__ hash_value_type operator()(column_device_view const& col,
1929  size_type row_index) const noexcept
1930  {
1931  auto hash = hash_value_type{0};
1932  column_device_view curr_col = col.slice(row_index, 1);
1933  while (curr_col.type().id() == type_id::STRUCT || curr_col.type().id() == type_id::LIST) {
1934  if (_check_nulls) {
1935  auto validity_it = detail::make_validity_iterator<true>(curr_col);
1936  hash = detail::accumulate(
1937  validity_it, validity_it + curr_col.size(), hash, [](auto hash, auto is_valid) {
1938  return cudf::hashing::detail::hash_combine(hash,
1939  is_valid ? NON_NULL_HASH : NULL_HASH);
1940  });
1941  }
1942  if (curr_col.type().id() == type_id::STRUCT) {
1943  if (curr_col.num_child_columns() == 0) { return hash; }
1944  // Non-empty structs are assumed to be decomposed and contain only one child
1945  curr_col = detail::structs_column_device_view(curr_col).get_sliced_child(0);
1946  } else if (curr_col.type().id() == type_id::LIST) {
1947  auto list_col = detail::lists_column_device_view(curr_col);
1948  auto list_sizes = make_list_size_iterator(list_col);
1949  hash = detail::accumulate(
1950  list_sizes, list_sizes + list_col.size(), hash, [](auto hash, auto size) {
1951  return cudf::hashing::detail::hash_combine(hash, hash_fn<size_type>{}(size));
1952  });
1953  curr_col = list_col.get_sliced_child();
1954  }
1955  }
1956  for (int i = 0; i < curr_col.size(); ++i) {
1957  hash = cudf::hashing::detail::hash_combine(
1958  hash,
1959  type_dispatcher<dispatch_void_if_nested>(curr_col.type(), _element_hasher, curr_col, i));
1960  }
1961  return hash;
1962  }
1963 
1964  element_hasher<hash_fn, Nullate> const _element_hasher;
1965  Nullate const _check_nulls;
1966  };
1967 
1968  CUDF_HOST_DEVICE device_row_hasher(Nullate check_nulls,
1969  table_device_view t,
1970  uint32_t seed = DEFAULT_HASH_SEED) noexcept
1971  : _check_nulls{check_nulls}, _table{t}, _seed(seed)
1972  {
1973  }
1974 
1975  Nullate const _check_nulls;
1976  table_device_view const _table;
1977  uint32_t const _seed;
1978 };
1979 
1980 // Inject row::equality::preprocessed_table into the row::hash namespace
1981 // As a result, row::equality::preprocessed_table and row::hash::preprocessed table are the same
1982 // type and are interchangeable.
1983 using preprocessed_table = row::equality::preprocessed_table;
1984 
1989 class row_hasher {
1990  public:
1999  : d_t(preprocessed_table::create(t, stream))
2000  {
2001  }
2002 
2012  row_hasher(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
2013 
2026  template <
2027  template <typename> class hash_function = cudf::hashing::detail::default_hash,
2028  template <template <typename> class, typename> class DeviceRowHasher = device_row_hasher,
2029  typename Nullate>
2030  DeviceRowHasher<hash_function, Nullate> device_hasher(Nullate nullate = {},
2031  uint32_t seed = DEFAULT_HASH_SEED) const
2032  {
2033  return DeviceRowHasher<hash_function, Nullate>(nullate, *d_t, seed);
2034  }
2035 
2036  private:
2037  std::shared_ptr<preprocessed_table> d_t;
2038 };
2039 
2040 } // namespace hash
2041 
2042 } // namespace row
2043 
2044 } // namespace experimental
2045 } // namespace CUDF_EXPORT cudf
An immutable, non-owning view of device data as a column of elements that is trivially copyable and u...
T element(size_type element_index) const noexcept
Returns a copy of the element at the specified index.
CUDF_HOST_DEVICE column_device_view slice(size_type offset, size_type size) const noexcept
Get a new column_device_view which is a slice of this column.
CUDF_HOST_DEVICE size_type num_child_columns() const noexcept
Returns the number of child columns.
const_iterator< T > begin() const
Return an iterator to the first element of the column.
const_iterator< T > end() const
Returns an iterator to the element following the last element of the column.
A container of nullable device data as a column of elements.
Definition: column.hpp:47
constexpr CUDF_HOST_DEVICE type_id id() const noexcept
Returns the type identifier.
Definition: types.hpp:287
CUDF_HOST_DEVICE data_type type() const noexcept
Returns the element type.
bool is_null(size_type element_index) const noexcept
Returns whether the specified element is null.
CUDF_HOST_DEVICE bool nullable() const noexcept
Indicates whether the column can contain null elements, i.e., if it has an allocated bitmask.
Given a column_device_view, an instance of this class provides a wrapper on this compound column for ...
column_device_view get_sliced_child() const
Fetches the child column of the underlying list column with offset and size applied.
Given a column_device_view, an instance of this class provides a wrapper on this compound column for ...
column_device_view get_sliced_child(size_type idx) const
Fetches the child column of the underlying struct column.
constexpr bool operator()(size_type const lhs_index, size_type const rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table is equal to the row at rhs_index in the rhs tabl...
Comparator for performing equality comparisons between two rows of the same table.
self_comparator(std::shared_ptr< preprocessed_table > t)
Construct an owning object for performing equality comparisons between two rows of the same table.
self_comparator(table_view const &t, rmm::cuda_stream_view stream)
Construct an owning object for performing equality comparisons between two rows of the same table.
auto equal_to(Nullate nullate={}, null_equality nulls_are_equal=null_equality::EQUAL, PhysicalEqualityComparator comparator={}) const noexcept
Get the comparison operator to use on the device.
An owning object that can be used to equality compare rows of two different tables.
two_table_comparator(std::shared_ptr< preprocessed_table > left, std::shared_ptr< preprocessed_table > right)
Construct an owning object for performing equality comparisons between two rows from two tables.
auto equal_to(Nullate nullate={}, null_equality nulls_are_equal=null_equality::EQUAL, PhysicalEqualityComparator comparator={}) const noexcept
Return the binary operator for comparing rows in the table.
two_table_comparator(table_view const &left, table_view const &right, rmm::cuda_stream_view stream)
Construct an owning object for performing equality comparisons between two rows from two tables.
Computes the hash value of a row in the given table.
auto operator()(size_type row_index) const noexcept
Return the hash value of a row in the given table.
Computes the hash value of an element in the given column.
element_hasher(Nullate nulls, uint32_t seed=DEFAULT_HASH_SEED, hash_value_type null_hash=cuda::std::numeric_limits< hash_value_type >::max()) noexcept
Constructs an element_hasher object.
hash_value_type _null_hash
Hash value to use for null elements.
Computes the hash value of a row in the given table.
DeviceRowHasher< hash_function, Nullate > device_hasher(Nullate nullate={}, uint32_t seed=DEFAULT_HASH_SEED) const
Get the hash operator to use on the device.
row_hasher(table_view const &t, rmm::cuda_stream_view stream)
Construct an owning object for hashing the rows of a table.
row_hasher(std::shared_ptr< preprocessed_table > t)
Construct an owning object for hashing the rows of a table from an existing preprocessed_table.
Performs a relational comparison between two elements in two columns.
cuda::std::pair< weak_ordering, int > operator()(size_type lhs_element_index, size_type rhs_element_index)
Compares two list-type columns.
cuda::std::pair< weak_ordering, int > operator()(size_type const lhs_element_index, size_type const rhs_element_index) const noexcept
Performs a relational comparison between the specified elements.
cuda::std::pair< weak_ordering, int > operator()(size_type const, size_type const) const noexcept
Throws run-time error when columns types cannot be compared or if this class is instantiated with has...
element_comparator(Nullate check_nulls, column_device_view lhs, column_device_view rhs, null_order null_precedence=null_order::BEFORE, int depth=0, PhysicalElementComparator comparator={}, optional_dremel_view l_dremel_device_view={}, optional_dremel_view r_dremel_device_view={})
Construct type-dispatched function object for performing a relational comparison between two elements...
Computes the lexicographic comparison between 2 rows.
constexpr weak_ordering operator()(size_type const lhs_index, size_type const rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table compares lexicographically less,...
device_row_comparator(Nullate check_nulls, table_device_view lhs, table_device_view rhs, cuda::std::optional< device_span< order const >> column_order=cuda::std::nullopt, cuda::std::optional< device_span< null_order const >> null_precedence=cuda::std::nullopt, PhysicalElementComparator comparator={}) noexcept
Construct a function object for performing a lexicographic comparison between the rows of two tables....
device_row_comparator(Nullate check_nulls, table_device_view lhs, table_device_view rhs, device_span< detail::dremel_device_view const > l_dremel_device_views, device_span< detail::dremel_device_view const > r_dremel_device_views, cuda::std::optional< device_span< int const >> depth=cuda::std::nullopt, cuda::std::optional< device_span< order const >> column_order=cuda::std::nullopt, cuda::std::optional< device_span< null_order const >> null_precedence=cuda::std::nullopt, PhysicalElementComparator comparator={}) noexcept
Construct a function object for performing a lexicographic comparison between the rows of two tables.
An owning object that can be used to lexicographically compare two rows of the same table.
auto less_equivalent(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
self_comparator(table_view const &t, host_span< order const > column_order={}, host_span< null_order const > null_precedence={}, rmm::cuda_stream_view stream=cudf::get_default_stream())
Construct an owning object for performing a lexicographic comparison between two rows of the same tab...
self_comparator(std::shared_ptr< preprocessed_table > t)
Construct an owning object for performing a lexicographic comparison between two rows of the same pre...
auto less(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
An owning object that can be used to lexicographically compare rows of two different tables.
two_table_comparator(std::shared_ptr< preprocessed_table > left, std::shared_ptr< preprocessed_table > right)
Construct an owning object for performing a lexicographic comparison between two rows of the same pre...
auto less(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
two_table_comparator(table_view const &left, table_view const &right, host_span< order const > column_order={}, host_span< null_order const > null_precedence={}, rmm::cuda_stream_view stream=cudf::get_default_stream())
Construct an owning object for performing a lexicographic comparison between rows of two different ta...
auto less_equivalent(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
Table device view that is usable in device memory.
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:200
A set of cudf::column's of the same size.
Definition: table.hpp:40
Column device view class definitions.
uint32_t hash_value_type
Type of hash value.
Definition: hashing.hpp:29
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
std::unique_ptr< cudf::column > is_valid(cudf::column_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Creates a column of type_id::BOOL8 elements where for every element in input true indicates the value...
CUDF_HOST_DEVICE constexpr decltype(auto) __forceinline__ type_dispatcher(cudf::data_type dtype, Functor f, Ts &&... args)
Invokes an operator() template with the type instantiation based on the specified cudf::data_type's i...
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:154
null_order
Indicates how null values compare against all other values.
Definition: types.hpp:159
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:151
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:95
#define CUDF_ENABLE_IF(...)
Convenience macro for SFINAE as an unnamed template parameter.
Definition: traits.hpp:50
cuDF interfaces
Definition: host_udf.hpp:37
weak_ordering
Result type of the element_relational_comparator function object.
@ EQUIVALENT
Indicates a is ordered neither before nor after b
auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence)
Compare the nulls according to null order.
CUDF_HOST_DEVICE auto make_list_size_iterator(detail::lists_column_device_view const &c)
Makes an iterator that returns size of the list by row index.
Column APIs for sort and rank.
Device version of C++20 std::span with reduced feature set.
Definition: span.hpp:355
A map from cudf::type_id to cudf type that excludes LIST and STRUCT types.
std::conditional_t< t==type_id::STRUCT or t==type_id::LIST, void, id_to_type< t > > type
The type to dispatch to if the type is nested.
Equality comparator functor that compares physical values rather than logical elements like lists,...
constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
Operator for equality comparison of non-floating point values.
Equality comparator functor that compares physical values rather than logical elements like lists,...
constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
Operator for equality comparisons.
Preprocessed table for use with row equality comparison or row hashing.
static std::shared_ptr< preprocessed_table > create(table_view const &table, rmm::cuda_stream_view stream)
Factory to construct preprocessed_table for use with row equality comparison or row hashing.
Wraps and interprets the result of device_row_comparator, true if the result is weak_ordering::LESS m...
less_comparator(Comparator const &comparator)
Constructs a less_comparator.
Wraps and interprets the result of device_row_comparator, true if the result is weak_ordering::LESS o...
less_equivalent_comparator(Comparator const &comparator)
Constructs a less_equivalent_comparator.
Computes a weak ordering of two values with special sorting behavior.
constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
Operator for relational comparisons.
Preprocessed table for use with lexicographical comparison.
std::invoke_result_t< decltype(table_device_view::create), table_view, rmm::cuda_stream_view > table_device_view_owner
Type of table device view owner for the preprocessed table.
static std::pair< std::shared_ptr< preprocessed_table >, std::shared_ptr< preprocessed_table > > create(table_view const &lhs, table_view const &rhs, host_span< order const > column_order, host_span< null_order const > null_precedence, rmm::cuda_stream_view stream)
Preprocess tables for use with lexicographical comparison.
static std::shared_ptr< preprocessed_table > create(table_view const &table, host_span< order const > column_order, host_span< null_order const > null_precedence, rmm::cuda_stream_view stream)
Preprocess table for use with lexicographical comparison.
Relational comparator functor that compares physical values rather than logical elements like lists,...
constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
Operator for relational comparison of non-floating point values.
A counting iterator that uses strongly typed indices bound to tables.
thrust::iterator_adaptor< strong_index_iterator< Index >, Index > super_t
The base class.
constexpr strong_index_iterator(Underlying n)
Constructs a strong index iterator.
C++20 std::span with reduced feature set.
Definition: span.hpp:194
Indicates the presence of nulls at compile-time or runtime.
Table device view class definitions.
bool has_nested_columns(table_view const &table)
Determine if any nested columns exist in a given table.
Defines the mapping between cudf::type_id runtime type information and concrete C++ types.
#define CUDF_HOST_DEVICE
Indicates that the function or method is usable on host and device.
Definition: types.hpp:32