experimental/row_operators.cuh
1 /*
2  * Copyright (c) 2022-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
20 #include <cudf/detail/iterator.cuh>
21 #include <cudf/detail/utilities/algorithm.cuh>
22 #include <cudf/detail/utilities/assert.cuh>
23 #include <cudf/hashing/detail/default_hash.cuh>
24 #include <cudf/hashing/detail/hashing.hpp>
25 #include <cudf/lists/detail/dremel.hpp>
26 #include <cudf/lists/list_device_view.cuh>
27 #include <cudf/lists/lists_column_device_view.cuh>
28 #include <cudf/sorting.hpp>
29 #include <cudf/structs/structs_column_device_view.cuh>
30 #include <cudf/table/row_operators.cuh>
35 
36 #include <cuda/std/limits>
37 #include <cuda/std/optional>
38 #include <cuda/std/tuple>
39 #include <cuda/std/utility>
40 #include <thrust/detail/use_default.h>
41 #include <thrust/equal.h>
42 #include <thrust/execution_policy.h>
43 #include <thrust/functional.h>
44 #include <thrust/iterator/counting_iterator.h>
45 #include <thrust/iterator/iterator_adaptor.h>
46 #include <thrust/iterator/iterator_categories.h>
47 #include <thrust/iterator/iterator_facade.h>
48 #include <thrust/iterator/transform_iterator.h>
49 #include <thrust/logical.h>
50 #include <thrust/swap.h>
51 #include <thrust/transform_reduce.h>
52 
53 #include <memory>
54 #include <type_traits>
55 
56 namespace CUDF_EXPORT cudf {
57 
58 namespace experimental {
59 
75 template <cudf::type_id t>
78  using type = std::conditional_t<t == type_id::STRUCT or t == type_id::LIST, void, id_to_type<t>>;
79 };
80 
81 namespace row {
82 
83 enum class lhs_index_type : size_type {};
84 enum class rhs_index_type : size_type {};
85 
99 template <typename Index, typename Underlying = std::underlying_type_t<Index>>
100 struct strong_index_iterator : public thrust::iterator_facade<strong_index_iterator<Index>,
101  Index,
102  thrust::use_default,
103  thrust::random_access_traversal_tag,
104  Index,
105  Underlying> {
106  using super_t =
107  thrust::iterator_adaptor<strong_index_iterator<Index>, Index>;
108 
114  explicit constexpr strong_index_iterator(Underlying n) : begin{n} {}
115 
116  friend class thrust::iterator_core_access;
117 
118  private:
119  __device__ constexpr void increment() { ++begin; }
120  __device__ constexpr void decrement() { --begin; }
121 
122  __device__ constexpr void advance(Underlying n) { begin += n; }
123 
124  __device__ constexpr bool equal(strong_index_iterator<Index> const& other) const noexcept
125  {
126  return begin == other.begin;
127  }
128 
129  __device__ constexpr Index dereference() const noexcept { return static_cast<Index>(begin); }
130 
131  __device__ constexpr Underlying distance_to(
132  strong_index_iterator<Index> const& other) const noexcept
133  {
134  return other.begin - begin;
135  }
136 
137  Underlying begin{};
138 };
139 
143 using lhs_iterator = strong_index_iterator<lhs_index_type>;
144 
148 using rhs_iterator = strong_index_iterator<rhs_index_type>;
149 
150 namespace lexicographic {
151 
167  template <typename Element>
168  __device__ constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
169  {
170  return detail::compare_elements(lhs, rhs);
171  }
172 };
173 
187  template <typename Element, CUDF_ENABLE_IF(not std::is_floating_point_v<Element>)>
188  __device__ constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
189  {
190  return detail::compare_elements(lhs, rhs);
191  }
192 
200  template <typename Element, CUDF_ENABLE_IF(std::is_floating_point_v<Element>)>
201  __device__ constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
202  {
203  if (isnan(lhs)) {
204  return isnan(rhs) ? weak_ordering::EQUIVALENT : weak_ordering::GREATER;
205  } else if (isnan(rhs)) {
206  return weak_ordering::LESS;
207  }
208 
209  return detail::compare_elements(lhs, rhs);
210  }
211 };
212 
213 using optional_dremel_view = cuda::std::optional<detail::dremel_device_view const>;
214 
215 // The has_nested_columns template parameter of the device_row_comparator is
216 // necessary to help the compiler optimize our code. Without it, the list and
217 // struct view specializations are present in the code paths used for primitive
218 // types, and the compiler fails to inline this nearly as well resulting in a
219 // significant performance drop. As a result, there is some minor tension in
220 // the current design between the presence of this parameter and the way that
221 // the Dremel data is passed around, first as a
222 // std::optional<device_span<dremel_device_view>> in the
223 // preprocessed_table/device_row_comparator (which is always valid when
224 // has_nested_columns and is otherwise invalid) that is then unpacked to a
225 // cuda::std::optional<dremel_device_view> at the element_comparator level (which
226 // is always valid for a list column and otherwise invalid). We cannot use an
227 // additional template parameter for the element_comparator on a per-column
228 // basis because we cannot conditionally define dremel_device_view member
229 // variables without jumping through extra hoops with inheritance, so the
230 // cuda::std::optional<dremel_device_view> member must be an optional rather than
231 // a raw dremel_device_view.
262 template <bool has_nested_columns,
263  typename Nullate,
264  typename PhysicalElementComparator = sorting_physical_element_comparator>
266  public:
267  friend class self_comparator;
268  friend class two_table_comparator;
269 
290  Nullate check_nulls,
291  table_device_view lhs,
292  table_device_view rhs,
293  device_span<detail::dremel_device_view const> l_dremel_device_views,
294  device_span<detail::dremel_device_view const> r_dremel_device_views,
295  cuda::std::optional<device_span<int const>> depth = cuda::std::nullopt,
296  cuda::std::optional<device_span<order const>> column_order = cuda::std::nullopt,
297  cuda::std::optional<device_span<null_order const>> null_precedence = cuda::std::nullopt,
298  PhysicalElementComparator comparator = {}) noexcept
299  : _lhs{lhs},
300  _rhs{rhs},
301  _l_dremel(l_dremel_device_views),
302  _r_dremel(r_dremel_device_views),
303  _check_nulls{check_nulls},
304  _depth{depth},
305  _column_order{column_order},
306  _null_precedence{null_precedence},
307  _comparator{comparator}
308  {
309  }
310 
329  template <bool nested_disable = not has_nested_columns, CUDF_ENABLE_IF(nested_disable)>
331  Nullate check_nulls,
332  table_device_view lhs,
333  table_device_view rhs,
334  cuda::std::optional<device_span<order const>> column_order = cuda::std::nullopt,
335  cuda::std::optional<device_span<null_order const>> null_precedence = cuda::std::nullopt,
336  PhysicalElementComparator comparator = {}) noexcept
337  : _lhs{lhs},
338  _rhs{rhs},
339  _l_dremel{},
340  _r_dremel{},
341  _check_nulls{check_nulls},
342  _depth{},
343  _column_order{column_order},
344  _null_precedence{null_precedence},
345  _comparator{comparator}
346  {
347  }
348 
353  public:
370  __device__ element_comparator(Nullate check_nulls,
371  column_device_view lhs,
372  column_device_view rhs,
373  null_order null_precedence = null_order::BEFORE,
374  int depth = 0,
375  PhysicalElementComparator comparator = {},
376  optional_dremel_view l_dremel_device_view = {},
377  optional_dremel_view r_dremel_device_view = {})
378  : _lhs{lhs},
379  _rhs{rhs},
380  _check_nulls{check_nulls},
381  _null_precedence{null_precedence},
382  _depth{depth},
383  _l_dremel_device_view{l_dremel_device_view},
384  _r_dremel_device_view{r_dremel_device_view},
385  _comparator{comparator}
386  {
387  }
388 
397  template <typename Element,
398  CUDF_ENABLE_IF(cudf::is_relationally_comparable<Element, Element>())>
399  __device__ cuda::std::pair<weak_ordering, int> operator()(
400  size_type const lhs_element_index, size_type const rhs_element_index) const noexcept
401  {
402  if (_check_nulls) {
403  bool const lhs_is_null{_lhs.is_null(lhs_element_index)};
404  bool const rhs_is_null{_rhs.is_null(rhs_element_index)};
405 
406  if (lhs_is_null or rhs_is_null) { // at least one is null
407  return cuda::std::pair(null_compare(lhs_is_null, rhs_is_null, _null_precedence), _depth);
408  }
409  }
410 
411  return cuda::std::pair(_comparator(_lhs.element<Element>(lhs_element_index),
412  _rhs.element<Element>(rhs_element_index)),
413  cuda::std::numeric_limits<int>::max());
414  }
415 
423  template <typename Element,
424  CUDF_ENABLE_IF(not cudf::is_relationally_comparable<Element, Element>() and
425  (not has_nested_columns or not cudf::is_nested<Element>()))>
426  __device__ cuda::std::pair<weak_ordering, int> operator()(size_type const,
427  size_type const) const noexcept
428  {
429  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
430  }
431 
440  template <typename Element,
441  CUDF_ENABLE_IF(has_nested_columns and std::is_same_v<Element, cudf::struct_view>)>
442  __device__ cuda::std::pair<weak_ordering, int> operator()(
443  size_type const lhs_element_index, size_type const rhs_element_index) const noexcept
444  {
445  column_device_view lcol = _lhs;
446  column_device_view rcol = _rhs;
447  int depth = _depth;
448  while (lcol.type().id() == type_id::STRUCT) {
449  bool const lhs_is_null{lcol.is_null(lhs_element_index)};
450  bool const rhs_is_null{rcol.is_null(rhs_element_index)};
451 
452  if (lhs_is_null or rhs_is_null) { // at least one is null
453  weak_ordering state = null_compare(lhs_is_null, rhs_is_null, _null_precedence);
454  return cuda::std::pair(state, depth);
455  }
456 
457  if (lcol.num_child_columns() == 0) {
458  return cuda::std::pair(weak_ordering::EQUIVALENT, cuda::std::numeric_limits<int>::max());
459  }
460 
461  // Non-empty structs have been modified to only have 1 child when using this.
464  ++depth;
465  }
466 
467  return cudf::type_dispatcher<dispatch_void_if_nested>(
468  lcol.type(),
469  element_comparator{_check_nulls, lcol, rcol, _null_precedence, depth, _comparator},
470  lhs_element_index,
471  rhs_element_index);
472  }
473 
482  template <typename Element,
483  CUDF_ENABLE_IF(has_nested_columns and std::is_same_v<Element, cudf::list_view>)>
484  __device__ cuda::std::pair<weak_ordering, int> operator()(size_type lhs_element_index,
485  size_type rhs_element_index)
486  {
487  // only order top-NULLs according to null_order
488  auto const is_l_row_null = _lhs.is_null(lhs_element_index);
489  auto const is_r_row_null = _rhs.is_null(rhs_element_index);
490  if (is_l_row_null || is_r_row_null) {
491  return cuda::std::pair(null_compare(is_l_row_null, is_r_row_null, _null_precedence),
492  _depth);
493  }
494 
495  // These are all the values from the Dremel encoding.
496  auto const l_max_def_level = _l_dremel_device_view->max_def_level;
497  auto const r_max_def_level = _r_dremel_device_view->max_def_level;
498  auto const l_def_levels = _l_dremel_device_view->def_levels;
499  auto const r_def_levels = _r_dremel_device_view->def_levels;
500  auto const l_rep_levels = _l_dremel_device_view->rep_levels;
501  auto const r_rep_levels = _r_dremel_device_view->rep_levels;
502 
503  // Traverse the nested list hierarchy to get a column device view
504  // pointing to the underlying child data.
505  column_device_view lcol = _lhs.slice(lhs_element_index, 1);
506  column_device_view rcol = _rhs.slice(rhs_element_index, 1);
507 
508  while (lcol.type().id() == type_id::LIST) {
511  }
512 
513  // These start and end values indicate the start and end points of all
514  // the elements of the lists in the current list element
515  // (`[lhs|rhs]_element_index`) that we are comparing.
516  auto const l_offsets = _l_dremel_device_view->offsets;
517  auto const r_offsets = _r_dremel_device_view->offsets;
518  auto l_start = l_offsets[lhs_element_index];
519  auto l_end = l_offsets[lhs_element_index + 1];
520  auto r_start = r_offsets[rhs_element_index];
521  auto r_end = r_offsets[rhs_element_index + 1];
522 
523  // This comparator will be used to compare leaf (non-nested) data types.
524  auto comparator =
525  element_comparator{_check_nulls, lcol, rcol, _null_precedence, _depth, _comparator};
526 
527  // Loop over each element in the encoding. Note that this includes nulls
528  // and empty lists, so not every index corresponds to an actual element
529  // in the child column. The element_index is used to keep track of the current
530  // child element that we're actually comparing.
531  for (int l_dremel_index = l_start, r_dremel_index = r_start, element_index = 0;
532  l_dremel_index < l_end and r_dremel_index < r_end;
533  ++l_dremel_index, ++r_dremel_index) {
534  auto const l_rep_level = l_rep_levels[l_dremel_index];
535  auto const r_rep_level = r_rep_levels[r_dremel_index];
536 
537  // early exit for smaller sub-list
538  if (l_rep_level != r_rep_level) {
539  // the lower repetition level is a smaller sub-list
540  return l_rep_level < r_rep_level ? cuda::std::pair(weak_ordering::LESS, _depth)
541  : cuda::std::pair(weak_ordering::GREATER, _depth);
542  }
543 
544  // only compare if left and right are at same nesting level
545  auto const l_def_level = l_def_levels[l_dremel_index];
546  auto const r_def_level = r_def_levels[r_dremel_index];
547 
548  // either left or right are empty or NULLs of arbitrary nesting
549  if (l_def_level < l_max_def_level || r_def_level < r_max_def_level) {
550  // in the fully unraveled version of the list column, only the
551  // most nested NULLs and leafs are present
552  // In this rare condition that we get to the most nested NULL, we increment
553  // element_index because either both rows have a deeply nested NULL at the
554  // same position, and we'll "continue" in our iteration, or we will early
555  // exit if only one of the rows has a deeply nested NULL
556  if ((lcol.nullable() and l_def_levels[l_dremel_index] == l_max_def_level - 1) or
557  (rcol.nullable() and r_def_levels[r_dremel_index] == r_max_def_level - 1)) {
558  ++element_index;
559  }
560  if (l_def_level == r_def_level) { continue; }
561  // We require [] < [NULL] < [leaf] for nested nulls.
562  // The null_precedence only affects top level nulls.
563  return l_def_level < r_def_level ? cuda::std::pair(weak_ordering::LESS, _depth)
564  : cuda::std::pair(weak_ordering::GREATER, _depth);
565  }
566 
567  // finally, compare leaf to leaf
568  weak_ordering state{weak_ordering::EQUIVALENT};
569  int last_null_depth = _depth;
570  cuda::std::tie(state, last_null_depth) = cudf::type_dispatcher<dispatch_void_if_nested>(
571  lcol.type(), comparator, element_index, element_index);
572  if (state != weak_ordering::EQUIVALENT) { return cuda::std::pair(state, _depth); }
573  ++element_index;
574  }
575 
576  // If we have reached this stage, we know that definition levels,
577  // repetition levels, and actual elements are identical in both list
578  // columns up to the `min(l_end - l_start, r_end - r_start)` element of
579  // the Dremel encoding. However, two lists can only compare equivalent if
580  // they are of the same length. Otherwise, the shorter of the two is less
581  // than the longer. This final check determines the appropriate resulting
582  // ordering by checking how many total elements each list is composed of.
583  return cuda::std::pair(detail::compare_elements(l_end - l_start, r_end - r_start), _depth);
584  }
585 
586  private:
587  column_device_view const _lhs;
588  column_device_view const _rhs;
589  Nullate const _check_nulls;
590  null_order const _null_precedence;
591  int const _depth;
592  optional_dremel_view _l_dremel_device_view;
593  optional_dremel_view _r_dremel_device_view;
594  PhysicalElementComparator const _comparator;
595  };
596 
597  public:
607  __device__ constexpr weak_ordering operator()(size_type const lhs_index,
608  size_type const rhs_index) const noexcept
609  {
610  int last_null_depth = cuda::std::numeric_limits<int>::max();
611  size_type list_column_index{-1};
612  for (size_type i = 0; i < _lhs.num_columns(); ++i) {
613  if (_lhs.column(i).type().id() == type_id::LIST) { ++list_column_index; }
614 
615  int const depth = _depth.has_value() ? (*_depth)[i] : 0;
616  if (depth > last_null_depth) { continue; }
617 
618  bool const ascending =
619  _column_order.has_value() ? (*_column_order)[i] == order::ASCENDING : true;
620 
621  null_order const null_precedence =
622  _null_precedence.has_value() ? (*_null_precedence)[i] : null_order::BEFORE;
623 
624  // TODO: At what point do we verify that the columns of lhs and rhs are
625  // all of the same types? I assume that it's already happened before
626  // here, otherwise the current code would be failing.
627  auto const [l_dremel_i, r_dremel_i] =
628  _lhs.column(i).type().id() == type_id::LIST
629  ? cuda::std::make_tuple(optional_dremel_view(_l_dremel[list_column_index]),
630  optional_dremel_view(_r_dremel[list_column_index]))
631  : cuda::std::make_tuple(optional_dremel_view{}, optional_dremel_view{});
632 
633  auto element_comp = element_comparator{_check_nulls,
634  _lhs.column(i),
635  _rhs.column(i),
636  null_precedence,
637  depth,
638  _comparator,
639  l_dremel_i,
640  r_dremel_i};
641 
642  weak_ordering state;
643  cuda::std::tie(state, last_null_depth) =
644  cudf::type_dispatcher(_lhs.column(i).type(), element_comp, lhs_index, rhs_index);
645 
646  if (state == weak_ordering::EQUIVALENT) { continue; }
647 
648  return ascending
649  ? state
650  : (state == weak_ordering::GREATER ? weak_ordering::LESS : weak_ordering::GREATER);
651  }
652  return weak_ordering::EQUIVALENT;
653  }
654 
655  private:
656  table_device_view const _lhs;
657  table_device_view const _rhs;
660  Nullate const _check_nulls;
661  cuda::std::optional<device_span<int const>> const _depth;
662  cuda::std::optional<device_span<order const>> const _column_order;
663  cuda::std::optional<device_span<null_order const>> const _null_precedence;
664  PhysicalElementComparator const _comparator;
665 }; // class device_row_comparator
666 
677 template <typename Comparator, weak_ordering... values>
678 struct weak_ordering_comparator_impl {
679  static_assert(not((weak_ordering::EQUIVALENT == values) && ...),
680  "weak_ordering_comparator should not be used for pure equality comparisons. The "
681  "`row_equality_comparator` should be used instead");
682 
683  template <typename LhsType, typename RhsType>
684  __device__ constexpr bool operator()(LhsType const lhs_index,
685  RhsType const rhs_index) const noexcept
686  {
687  weak_ordering const result = comparator(lhs_index, rhs_index);
688  return ((result == values) || ...);
689  }
690  Comparator const comparator;
691 };
692 
699 template <typename Comparator>
700 struct less_comparator : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS> {
706  less_comparator(Comparator const& comparator)
707  : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS>{comparator}
708  {
709  }
710 };
711 
719 template <typename Comparator>
721  : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS, weak_ordering::EQUIVALENT> {
727  less_equivalent_comparator(Comparator const& comparator)
728  : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS, weak_ordering::EQUIVALENT>{
729  comparator}
730  {
731  }
732 };
733 
741  std::invoke_result_t<decltype(table_device_view::create), table_view, rmm::cuda_stream_view>;
742 
765  static std::shared_ptr<preprocessed_table> create(table_view const& table,
766  host_span<order const> column_order,
767  host_span<null_order const> null_precedence,
768  rmm::cuda_stream_view stream);
769 
791  static std::pair<std::shared_ptr<preprocessed_table>, std::shared_ptr<preprocessed_table>> create(
792  table_view const& lhs,
793  table_view const& rhs,
794  host_span<order const> column_order,
795  host_span<null_order const> null_precedence,
796  rmm::cuda_stream_view stream);
797 
798  private:
799  friend class self_comparator;
800  friend class two_table_comparator;
801 
821  static std::shared_ptr<preprocessed_table> create(
822  table_view const& preprocessed_input,
823  std::vector<int>&& verticalized_col_depths,
824  std::vector<std::unique_ptr<column>>&& transformed_columns,
825  host_span<order const> column_order,
826  host_span<null_order const> null_precedence,
827  bool has_ranked_children,
828  rmm::cuda_stream_view stream);
829 
857  rmm::device_uvector<order>&& column_order,
858  rmm::device_uvector<null_order>&& null_precedence,
860  std::vector<detail::dremel_data>&& dremel_data,
862  std::vector<std::unique_ptr<column>>&& transformed_columns,
863  bool has_ranked_children);
864 
866  rmm::device_uvector<order>&& column_order,
867  rmm::device_uvector<null_order>&& null_precedence,
869  std::vector<std::unique_ptr<column>>&& transformed_columns,
870  bool has_ranked_children);
871 
877  operator table_device_view() { return *_t; }
878 
885  [[nodiscard]] cuda::std::optional<device_span<order const>> column_order() const
886  {
887  return _column_order.size() ? cuda::std::optional<device_span<order const>>(_column_order)
888  : cuda::std::nullopt;
889  }
890 
898  [[nodiscard]] cuda::std::optional<device_span<null_order const>> null_precedence() const
899  {
900  return _null_precedence.size()
901  ? cuda::std::optional<device_span<null_order const>>(_null_precedence)
902  : cuda::std::nullopt;
903  }
904 
913  [[nodiscard]] cuda::std::optional<device_span<int const>> depths() const
914  {
915  return _depths.size() ? cuda::std::optional<device_span<int const>>(_depths)
916  : cuda::std::nullopt;
917  }
918 
919  [[nodiscard]] device_span<detail::dremel_device_view const> dremel_device_views() const
920  {
921  if (_dremel_device_views.has_value()) {
922  return device_span<detail::dremel_device_view const>(*_dremel_device_views);
923  } else {
924  return {};
925  }
926  }
927 
928  template <typename PhysicalElementComparator>
929  void check_physical_element_comparator()
930  {
931  if constexpr (!std::is_same_v<PhysicalElementComparator, sorting_physical_element_comparator>) {
932  CUDF_EXPECTS(!_has_ranked_children,
933  "The input table has nested type children and they were transformed using a "
934  "different type of physical element comparator.");
935  }
936  }
937 
938  private:
939  table_device_view_owner const _t;
940  rmm::device_uvector<order> const _column_order;
941  rmm::device_uvector<null_order> const _null_precedence;
942  rmm::device_uvector<size_type> const _depths;
943 
944  // Dremel encoding of list columns used for the comparison algorithm
945  cuda::std::optional<std::vector<detail::dremel_data>> _dremel_data;
946  cuda::std::optional<rmm::device_uvector<detail::dremel_device_view>> _dremel_device_views;
947 
948  // Intermediate columns generated from transforming nested children columns into
949  // integers columns using `cudf::rank()`, need to be kept alive.
950  std::vector<std::unique_ptr<column>> _transformed_columns;
951 
952  // Flag to record if the input table was preprocessed to transform any nested children column(s)
953  // into integer column(s) using `cudf::rank`.
954  bool const _has_ranked_children;
955 };
956 
971  public:
987  host_span<order const> column_order = {},
988  host_span<null_order const> null_precedence = {},
990  : d_t{preprocessed_table::create(t, column_order, null_precedence, stream)}
991  {
992  }
993 
1003  self_comparator(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
1004 
1033  template <bool has_nested_columns,
1034  typename Nullate,
1035  typename PhysicalElementComparator = sorting_physical_element_comparator>
1036  auto less(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const
1037  {
1038  d_t->check_physical_element_comparator<PhysicalElementComparator>();
1039 
1040  return less_comparator{
1041  device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1042  nullate,
1043  *d_t,
1044  *d_t,
1045  d_t->dremel_device_views(),
1046  d_t->dremel_device_views(),
1047  d_t->depths(),
1048  d_t->column_order(),
1049  d_t->null_precedence(),
1050  comparator}};
1051  }
1052 
1054  template <bool has_nested_columns,
1055  typename Nullate,
1056  typename PhysicalElementComparator = sorting_physical_element_comparator>
1057  auto less_equivalent(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const
1058  {
1059  d_t->check_physical_element_comparator<PhysicalElementComparator>();
1060 
1061  return less_equivalent_comparator{
1062  device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1063  nullate,
1064  *d_t,
1065  *d_t,
1066  d_t->dremel_device_views(),
1067  d_t->dremel_device_views(),
1068  d_t->depths(),
1069  d_t->column_order(),
1070  d_t->null_precedence(),
1071  comparator}};
1072  }
1073 
1074  private:
1075  std::shared_ptr<preprocessed_table> d_t;
1076 };
1077 
1078 // @cond
1079 template <typename Comparator>
1080 struct strong_index_comparator_adapter {
1081  strong_index_comparator_adapter(Comparator const& comparator) : comparator{comparator} {}
1082 
1083  __device__ constexpr weak_ordering operator()(lhs_index_type const lhs_index,
1084  rhs_index_type const rhs_index) const noexcept
1085  {
1086  return comparator(static_cast<cudf::size_type>(lhs_index),
1087  static_cast<cudf::size_type>(rhs_index));
1088  }
1089 
1090  __device__ constexpr weak_ordering operator()(rhs_index_type const rhs_index,
1091  lhs_index_type const lhs_index) const noexcept
1092  {
1093  auto const left_right_ordering =
1094  comparator(static_cast<cudf::size_type>(lhs_index), static_cast<cudf::size_type>(rhs_index));
1095 
1096  // Invert less/greater values to reflect right to left ordering
1097  if (left_right_ordering == weak_ordering::LESS) {
1098  return weak_ordering::GREATER;
1099  } else if (left_right_ordering == weak_ordering::GREATER) {
1100  return weak_ordering::LESS;
1101  }
1102  return weak_ordering::EQUIVALENT;
1103  }
1104 
1105  Comparator const comparator;
1106 };
1107 // @endcond
1108 
1124  public:
1144  table_view const& right,
1145  host_span<order const> column_order = {},
1146  host_span<null_order const> null_precedence = {},
1148 
1163  two_table_comparator(std::shared_ptr<preprocessed_table> left,
1164  std::shared_ptr<preprocessed_table> right)
1165  : d_left_table{std::move(left)}, d_right_table{std::move(right)}
1166  {
1167  }
1168 
1205  template <bool has_nested_columns,
1206  typename Nullate,
1207  typename PhysicalElementComparator = sorting_physical_element_comparator>
1208  auto less(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const
1209  {
1210  d_left_table->check_physical_element_comparator<PhysicalElementComparator>();
1211  d_right_table->check_physical_element_comparator<PhysicalElementComparator>();
1212 
1213  return less_comparator{strong_index_comparator_adapter{
1214  device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1215  nullate,
1216  *d_left_table,
1217  *d_right_table,
1218  d_left_table->dremel_device_views(),
1219  d_right_table->dremel_device_views(),
1220  d_left_table->depths(),
1221  d_left_table->column_order(),
1222  d_left_table->null_precedence(),
1223  comparator}}};
1224  }
1225 
1227  template <bool has_nested_columns,
1228  typename Nullate,
1229  typename PhysicalElementComparator = sorting_physical_element_comparator>
1230  auto less_equivalent(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const
1231  {
1232  d_left_table->check_physical_element_comparator<PhysicalElementComparator>();
1233  d_right_table->check_physical_element_comparator<PhysicalElementComparator>();
1234 
1235  return less_equivalent_comparator{strong_index_comparator_adapter{
1236  device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1237  nullate,
1238  *d_left_table,
1239  *d_right_table,
1240  d_left_table->dremel_device_views(),
1241  d_right_table->dremel_device_views(),
1242  d_left_table->depths(),
1243  d_left_table->column_order(),
1244  d_left_table->null_precedence(),
1245  comparator}}};
1246  }
1247 
1248  private:
1249  std::shared_ptr<preprocessed_table> d_left_table;
1250  std::shared_ptr<preprocessed_table> d_right_table;
1251 };
1252 
1253 } // namespace lexicographic
1254 
1255 namespace hash {
1256 class row_hasher;
1257 } // namespace hash
1258 
1259 namespace equality {
1260 
1276  template <typename Element>
1277  __device__ constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
1278  {
1279  return lhs == rhs;
1280  }
1281 };
1282 
1295  template <typename Element, CUDF_ENABLE_IF(not std::is_floating_point_v<Element>)>
1296  __device__ constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
1297  {
1298  return lhs == rhs;
1299  }
1300 
1310  template <typename Element, CUDF_ENABLE_IF(std::is_floating_point_v<Element>)>
1311  __device__ constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
1312  {
1313  return isnan(lhs) and isnan(rhs) ? true : lhs == rhs;
1314  }
1315 };
1316 
1338 template <bool has_nested_columns,
1339  typename Nullate,
1340  typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
1342  friend class self_comparator;
1343  friend class two_table_comparator;
1344 
1345  public:
1354  __device__ constexpr bool operator()(size_type const lhs_index,
1355  size_type const rhs_index) const noexcept
1356  {
1357  auto equal_elements = [=](column_device_view l, column_device_view r) {
1358  return cudf::type_dispatcher(
1359  l.type(),
1360  element_comparator{check_nulls, l, r, nulls_are_equal, comparator},
1361  lhs_index,
1362  rhs_index);
1363  };
1364 
1365  return thrust::equal(thrust::seq, lhs.begin(), lhs.end(), rhs.begin(), equal_elements);
1366  }
1367 
1368  private:
1379  device_row_comparator(Nullate check_nulls,
1380  table_device_view lhs,
1381  table_device_view rhs,
1382  null_equality nulls_are_equal = null_equality::EQUAL,
1383  PhysicalEqualityComparator comparator = {}) noexcept
1384  : lhs{lhs},
1385  rhs{rhs},
1386  check_nulls{check_nulls},
1387  nulls_are_equal{nulls_are_equal},
1388  comparator{comparator}
1389  {
1390  }
1391 
1395  class element_comparator {
1396  public:
1409  __device__ element_comparator(Nullate check_nulls,
1410  column_device_view lhs,
1411  column_device_view rhs,
1412  null_equality nulls_are_equal = null_equality::EQUAL,
1413  PhysicalEqualityComparator comparator = {}) noexcept
1414  : lhs{lhs},
1415  rhs{rhs},
1416  check_nulls{check_nulls},
1417  nulls_are_equal{nulls_are_equal},
1418  comparator{comparator}
1419  {
1420  }
1421 
1430  template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
1431  __device__ bool operator()(size_type const lhs_element_index,
1432  size_type const rhs_element_index) const noexcept
1433  {
1434  if (check_nulls) {
1435  bool const lhs_is_null{lhs.is_null(lhs_element_index)};
1436  bool const rhs_is_null{rhs.is_null(rhs_element_index)};
1437  if (lhs_is_null and rhs_is_null) {
1438  return nulls_are_equal == null_equality::EQUAL;
1439  } else if (lhs_is_null != rhs_is_null) {
1440  return false;
1441  }
1442  }
1443 
1444  return comparator(lhs.element<Element>(lhs_element_index),
1445  rhs.element<Element>(rhs_element_index));
1446  }
1447 
1448  template <typename Element,
1449  CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>() and
1450  (not has_nested_columns or not cudf::is_nested<Element>())),
1451  typename... Args>
1452  __device__ bool operator()(Args...)
1453  {
1454  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
1455  }
1456 
1457  template <typename Element, CUDF_ENABLE_IF(has_nested_columns and cudf::is_nested<Element>())>
1458  __device__ bool operator()(size_type const lhs_element_index,
1459  size_type const rhs_element_index) const noexcept
1460  {
1461  column_device_view lcol = lhs.slice(lhs_element_index, 1);
1462  column_device_view rcol = rhs.slice(rhs_element_index, 1);
1463  while (lcol.type().id() == type_id::STRUCT || lcol.type().id() == type_id::LIST) {
1464  if (check_nulls) {
1465  auto lvalid = detail::make_validity_iterator<true>(lcol);
1466  auto rvalid = detail::make_validity_iterator<true>(rcol);
1467  if (nulls_are_equal == null_equality::UNEQUAL) {
1468  if (thrust::any_of(
1469  thrust::seq, lvalid, lvalid + lcol.size(), thrust::logical_not<bool>()) or
1470  thrust::any_of(
1471  thrust::seq, rvalid, rvalid + rcol.size(), thrust::logical_not<bool>())) {
1472  return false;
1473  }
1474  } else {
1475  if (not thrust::equal(thrust::seq, lvalid, lvalid + lcol.size(), rvalid)) {
1476  return false;
1477  }
1478  }
1479  }
1480  if (lcol.type().id() == type_id::STRUCT) {
1481  if (lcol.num_child_columns() == 0) { return true; }
1482  // Non-empty structs are assumed to be decomposed and contain only one child
1483  lcol = detail::structs_column_device_view(lcol).get_sliced_child(0);
1484  rcol = detail::structs_column_device_view(rcol).get_sliced_child(0);
1485  } else if (lcol.type().id() == type_id::LIST) {
1486  auto l_list_col = detail::lists_column_device_view(lcol);
1487  auto r_list_col = detail::lists_column_device_view(rcol);
1488 
1489  auto lsizes = make_list_size_iterator(l_list_col);
1490  auto rsizes = make_list_size_iterator(r_list_col);
1491  if (not thrust::equal(thrust::seq, lsizes, lsizes + lcol.size(), rsizes)) {
1492  return false;
1493  }
1494 
1495  lcol = l_list_col.get_sliced_child();
1496  rcol = r_list_col.get_sliced_child();
1497  if (lcol.size() != rcol.size()) { return false; }
1498  }
1499  }
1500 
1501  auto comp = column_comparator{
1502  element_comparator{check_nulls, lcol, rcol, nulls_are_equal, comparator}, lcol.size()};
1503  return type_dispatcher<dispatch_void_if_nested>(lcol.type(), comp);
1504  }
1505 
1506  private:
1514  struct column_comparator {
1515  element_comparator const comp;
1516  size_type const size;
1517 
1523  template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
1524  __device__ bool operator()() const noexcept
1525  {
1526  return thrust::all_of(thrust::seq,
1527  thrust::make_counting_iterator(0),
1528  thrust::make_counting_iterator(0) + size,
1529  [=](auto i) { return comp.template operator()<Element>(i, i); });
1530  }
1531 
1532  template <typename Element,
1533  CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>()),
1534  typename... Args>
1535  __device__ bool operator()(Args...) const noexcept
1536  {
1537  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
1538  }
1539  };
1540 
1541  column_device_view const lhs;
1542  column_device_view const rhs;
1543  Nullate const check_nulls;
1544  null_equality const nulls_are_equal;
1545  PhysicalEqualityComparator const comparator;
1546  };
1547 
1548  table_device_view const lhs;
1549  table_device_view const rhs;
1550  Nullate const check_nulls;
1551  null_equality const nulls_are_equal;
1552  PhysicalEqualityComparator const comparator;
1553 };
1554 
1572  static std::shared_ptr<preprocessed_table> create(table_view const& table,
1573  rmm::cuda_stream_view stream);
1574 
1575  private:
1576  friend class self_comparator;
1577  friend class two_table_comparator;
1578  friend class hash::row_hasher;
1579 
1580  using table_device_view_owner =
1581  std::invoke_result_t<decltype(table_device_view::create), table_view, rmm::cuda_stream_view>;
1582 
1583  preprocessed_table(table_device_view_owner&& table,
1584  std::vector<rmm::device_buffer>&& null_buffers,
1585  std::vector<std::unique_ptr<column>>&& tmp_columns)
1586  : _t(std::move(table)),
1587  _null_buffers(std::move(null_buffers)),
1588  _tmp_columns(std::move(tmp_columns))
1589  {
1590  }
1591 
1597  operator table_device_view() { return *_t; }
1598 
1599  table_device_view_owner _t;
1600  std::vector<rmm::device_buffer> _null_buffers;
1601  std::vector<std::unique_ptr<column>> _tmp_columns;
1602 };
1603 
1609  public:
1619  : d_t(preprocessed_table::create(t, stream))
1620  {
1621  }
1622 
1632  self_comparator(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
1633 
1659  template <bool has_nested_columns,
1660  typename Nullate,
1661  typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
1662  auto equal_to(Nullate nullate = {},
1663  null_equality nulls_are_equal = null_equality::EQUAL,
1664  PhysicalEqualityComparator comparator = {}) const noexcept
1665  {
1666  return device_row_comparator<has_nested_columns, Nullate, PhysicalEqualityComparator>{
1667  nullate, *d_t, *d_t, nulls_are_equal, comparator};
1668  }
1669 
1670  private:
1671  std::shared_ptr<preprocessed_table> d_t;
1672 };
1673 
1674 // @cond
1675 template <typename Comparator>
1676 struct strong_index_comparator_adapter {
1677  strong_index_comparator_adapter(Comparator const& comparator) : comparator{comparator} {}
1678 
1679  __device__ constexpr bool operator()(lhs_index_type const lhs_index,
1680  rhs_index_type const rhs_index) const noexcept
1681  {
1682  return comparator(static_cast<cudf::size_type>(lhs_index),
1683  static_cast<cudf::size_type>(rhs_index));
1684  }
1685 
1686  __device__ constexpr bool operator()(rhs_index_type const rhs_index,
1687  lhs_index_type const lhs_index) const noexcept
1688  {
1689  return this->operator()(lhs_index, rhs_index);
1690  }
1691 
1692  Comparator const comparator;
1693 };
1694 // @endcond
1695 
1710  public:
1724  table_view const& right,
1725  rmm::cuda_stream_view stream);
1726 
1737  two_table_comparator(std::shared_ptr<preprocessed_table> left,
1738  std::shared_ptr<preprocessed_table> right)
1739  : d_left_table{std::move(left)}, d_right_table{std::move(right)}
1740  {
1741  }
1742 
1773  template <bool has_nested_columns,
1774  typename Nullate,
1775  typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
1776  auto equal_to(Nullate nullate = {},
1777  null_equality nulls_are_equal = null_equality::EQUAL,
1778  PhysicalEqualityComparator comparator = {}) const noexcept
1779  {
1780  return strong_index_comparator_adapter{
1781  device_row_comparator<has_nested_columns, Nullate, PhysicalEqualityComparator>(
1782  nullate, *d_left_table, *d_right_table, nulls_are_equal, comparator)};
1783  }
1784 
1785  private:
1786  std::shared_ptr<preprocessed_table> d_left_table;
1787  std::shared_ptr<preprocessed_table> d_right_table;
1788 };
1789 
1790 } // namespace equality
1791 
1792 namespace hash {
1793 
1800 template <template <typename> class hash_function, typename Nullate>
1802  public:
1810  __device__ element_hasher(
1811  Nullate nulls,
1812  uint32_t seed = DEFAULT_HASH_SEED,
1813  hash_value_type null_hash = cuda::std::numeric_limits<hash_value_type>::max()) noexcept
1814  : _check_nulls(nulls), _seed(seed), _null_hash(null_hash)
1815  {
1816  }
1817 
1826  template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
1827  __device__ hash_value_type operator()(column_device_view const& col,
1828  size_type row_index) const noexcept
1829  {
1830  if (_check_nulls && col.is_null(row_index)) { return _null_hash; }
1831  return hash_function<T>{_seed}(col.element<T>(row_index));
1832  }
1833 
1842  template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
1843  __device__ hash_value_type operator()(column_device_view const& col,
1844  size_type row_index) const noexcept
1845  {
1846  CUDF_UNREACHABLE("Unsupported type in hash.");
1847  }
1848 
1849  Nullate _check_nulls;
1850  uint32_t _seed;
1852 };
1853 
1860 template <template <typename> class hash_function, typename Nullate>
1862  friend class row_hasher;
1863 
1864  public:
1871  __device__ auto operator()(size_type row_index) const noexcept
1872  {
1873  auto it = thrust::make_transform_iterator(_table.begin(), [=](auto const& column) {
1874  return cudf::type_dispatcher<dispatch_storage_type>(
1875  column.type(),
1876  element_hasher_adapter<hash_function>{_check_nulls, _seed},
1877  column,
1878  row_index);
1879  });
1880 
1881  // Hash each element and combine all the hash values together
1882  return detail::accumulate(it, it + _table.num_columns(), _seed, [](auto hash, auto h) {
1883  return cudf::hashing::detail::hash_combine(hash, h);
1884  });
1885  }
1886 
1887  private:
1895  template <template <typename> class hash_fn>
1896  class element_hasher_adapter {
1897  static constexpr hash_value_type NULL_HASH = cuda::std::numeric_limits<hash_value_type>::max();
1898  static constexpr hash_value_type NON_NULL_HASH = 0;
1899 
1900  public:
1901  __device__ element_hasher_adapter(Nullate check_nulls, uint32_t seed) noexcept
1902  : _element_hasher(check_nulls, seed), _check_nulls(check_nulls)
1903  {
1904  }
1905 
1906  template <typename T, CUDF_ENABLE_IF(not cudf::is_nested<T>())>
1907  __device__ hash_value_type operator()(column_device_view const& col,
1908  size_type row_index) const noexcept
1909  {
1910  return _element_hasher.template operator()<T>(col, row_index);
1911  }
1912 
1913  template <typename T, CUDF_ENABLE_IF(cudf::is_nested<T>())>
1914  __device__ hash_value_type operator()(column_device_view const& col,
1915  size_type row_index) const noexcept
1916  {
1917  auto hash = hash_value_type{0};
1918  column_device_view curr_col = col.slice(row_index, 1);
1919  while (curr_col.type().id() == type_id::STRUCT || curr_col.type().id() == type_id::LIST) {
1920  if (_check_nulls) {
1921  auto validity_it = detail::make_validity_iterator<true>(curr_col);
1922  hash = detail::accumulate(
1923  validity_it, validity_it + curr_col.size(), hash, [](auto hash, auto is_valid) {
1924  return cudf::hashing::detail::hash_combine(hash,
1925  is_valid ? NON_NULL_HASH : NULL_HASH);
1926  });
1927  }
1928  if (curr_col.type().id() == type_id::STRUCT) {
1929  if (curr_col.num_child_columns() == 0) { return hash; }
1930  // Non-empty structs are assumed to be decomposed and contain only one child
1931  curr_col = detail::structs_column_device_view(curr_col).get_sliced_child(0);
1932  } else if (curr_col.type().id() == type_id::LIST) {
1933  auto list_col = detail::lists_column_device_view(curr_col);
1934  auto list_sizes = make_list_size_iterator(list_col);
1935  hash = detail::accumulate(
1936  list_sizes, list_sizes + list_col.size(), hash, [](auto hash, auto size) {
1937  return cudf::hashing::detail::hash_combine(hash, hash_fn<size_type>{}(size));
1938  });
1939  curr_col = list_col.get_sliced_child();
1940  }
1941  }
1942  for (int i = 0; i < curr_col.size(); ++i) {
1943  hash = cudf::hashing::detail::hash_combine(
1944  hash,
1945  type_dispatcher<dispatch_void_if_nested>(curr_col.type(), _element_hasher, curr_col, i));
1946  }
1947  return hash;
1948  }
1949 
1950  element_hasher<hash_fn, Nullate> const _element_hasher;
1951  Nullate const _check_nulls;
1952  };
1953 
1954  CUDF_HOST_DEVICE device_row_hasher(Nullate check_nulls,
1955  table_device_view t,
1956  uint32_t seed = DEFAULT_HASH_SEED) noexcept
1957  : _check_nulls{check_nulls}, _table{t}, _seed(seed)
1958  {
1959  }
1960 
1961  Nullate const _check_nulls;
1962  table_device_view const _table;
1963  uint32_t const _seed;
1964 };
1965 
1966 // Inject row::equality::preprocessed_table into the row::hash namespace
1967 // As a result, row::equality::preprocessed_table and row::hash::preprocessed table are the same
1968 // type and are interchangeable.
1969 using preprocessed_table = row::equality::preprocessed_table;
1970 
1975 class row_hasher {
1976  public:
1985  : d_t(preprocessed_table::create(t, stream))
1986  {
1987  }
1988 
1998  row_hasher(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
1999 
2012  template <template <typename> class hash_function = cudf::hashing::detail::default_hash,
2013  template <template <typename> class, typename>
2014  class DeviceRowHasher = device_row_hasher,
2015  typename Nullate>
2016  DeviceRowHasher<hash_function, Nullate> device_hasher(Nullate nullate = {},
2017  uint32_t seed = DEFAULT_HASH_SEED) const
2018  {
2019  return DeviceRowHasher<hash_function, Nullate>(nullate, *d_t, seed);
2020  }
2021 
2022  private:
2023  std::shared_ptr<preprocessed_table> d_t;
2024 };
2025 
2026 } // namespace hash
2027 
2028 } // namespace row
2029 
2030 } // namespace experimental
2031 } // namespace CUDF_EXPORT cudf
An immutable, non-owning view of device data as a column of elements that is trivially copyable and u...
T element(size_type element_index) const noexcept
Returns reference to element at the specified index.
CUDF_HOST_DEVICE column_device_view slice(size_type offset, size_type size) const noexcept
Get a new column_device_view which is a slice of this column.
CUDF_HOST_DEVICE size_type num_child_columns() const noexcept
Returns the number of child columns.
const_iterator< T > begin() const
Return an iterator to the first element of the column.
const_iterator< T > end() const
Returns an iterator to the element following the last element of the column.
A container of nullable device data as a column of elements.
Definition: column.hpp:47
constexpr CUDF_HOST_DEVICE type_id id() const noexcept
Returns the type identifier.
Definition: types.hpp:287
CUDF_HOST_DEVICE data_type type() const noexcept
Returns the element type.
bool is_null(size_type element_index) const noexcept
Returns whether the specified element is null.
CUDF_HOST_DEVICE bool nullable() const noexcept
Indicates whether the column can contain null elements, i.e., if it has an allocated bitmask.
Given a column_device_view, an instance of this class provides a wrapper on this compound column for ...
column_device_view get_sliced_child() const
Fetches the child column of the underlying list column with offset and size applied.
Given a column_device_view, an instance of this class provides a wrapper on this compound column for ...
column_device_view get_sliced_child(size_type idx) const
Fetches the child column of the underlying struct column.
constexpr bool operator()(size_type const lhs_index, size_type const rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table is equal to the row at rhs_index in the rhs tabl...
Comparator for performing equality comparisons between two rows of the same table.
self_comparator(std::shared_ptr< preprocessed_table > t)
Construct an owning object for performing equality comparisons between two rows of the same table.
self_comparator(table_view const &t, rmm::cuda_stream_view stream)
Construct an owning object for performing equality comparisons between two rows of the same table.
auto equal_to(Nullate nullate={}, null_equality nulls_are_equal=null_equality::EQUAL, PhysicalEqualityComparator comparator={}) const noexcept
Get the comparison operator to use on the device.
An owning object that can be used to equality compare rows of two different tables.
two_table_comparator(std::shared_ptr< preprocessed_table > left, std::shared_ptr< preprocessed_table > right)
Construct an owning object for performing equality comparisons between two rows from two tables.
auto equal_to(Nullate nullate={}, null_equality nulls_are_equal=null_equality::EQUAL, PhysicalEqualityComparator comparator={}) const noexcept
Return the binary operator for comparing rows in the table.
two_table_comparator(table_view const &left, table_view const &right, rmm::cuda_stream_view stream)
Construct an owning object for performing equality comparisons between two rows from two tables.
Computes the hash value of a row in the given table.
auto operator()(size_type row_index) const noexcept
Return the hash value of a row in the given table.
Computes the hash value of an element in the given column.
element_hasher(Nullate nulls, uint32_t seed=DEFAULT_HASH_SEED, hash_value_type null_hash=cuda::std::numeric_limits< hash_value_type >::max()) noexcept
Constructs an element_hasher object.
hash_value_type _null_hash
Hash value to use for null elements.
Computes the hash value of a row in the given table.
DeviceRowHasher< hash_function, Nullate > device_hasher(Nullate nullate={}, uint32_t seed=DEFAULT_HASH_SEED) const
Get the hash operator to use on the device.
row_hasher(table_view const &t, rmm::cuda_stream_view stream)
Construct an owning object for hashing the rows of a table.
row_hasher(std::shared_ptr< preprocessed_table > t)
Construct an owning object for hashing the rows of a table from an existing preprocessed_table.
Performs a relational comparison between two elements in two columns.
cuda::std::pair< weak_ordering, int > operator()(size_type lhs_element_index, size_type rhs_element_index)
Compares two list-type columns.
cuda::std::pair< weak_ordering, int > operator()(size_type const lhs_element_index, size_type const rhs_element_index) const noexcept
Performs a relational comparison between the specified elements.
cuda::std::pair< weak_ordering, int > operator()(size_type const, size_type const) const noexcept
Throws run-time error when columns types cannot be compared or if this class is instantiated with has...
element_comparator(Nullate check_nulls, column_device_view lhs, column_device_view rhs, null_order null_precedence=null_order::BEFORE, int depth=0, PhysicalElementComparator comparator={}, optional_dremel_view l_dremel_device_view={}, optional_dremel_view r_dremel_device_view={})
Construct type-dispatched function object for performing a relational comparison between two elements...
Computes the lexicographic comparison between 2 rows.
constexpr weak_ordering operator()(size_type const lhs_index, size_type const rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table compares lexicographically less,...
device_row_comparator(Nullate check_nulls, table_device_view lhs, table_device_view rhs, cuda::std::optional< device_span< order const >> column_order=cuda::std::nullopt, cuda::std::optional< device_span< null_order const >> null_precedence=cuda::std::nullopt, PhysicalElementComparator comparator={}) noexcept
Construct a function object for performing a lexicographic comparison between the rows of two tables....
device_row_comparator(Nullate check_nulls, table_device_view lhs, table_device_view rhs, device_span< detail::dremel_device_view const > l_dremel_device_views, device_span< detail::dremel_device_view const > r_dremel_device_views, cuda::std::optional< device_span< int const >> depth=cuda::std::nullopt, cuda::std::optional< device_span< order const >> column_order=cuda::std::nullopt, cuda::std::optional< device_span< null_order const >> null_precedence=cuda::std::nullopt, PhysicalElementComparator comparator={}) noexcept
Construct a function object for performing a lexicographic comparison between the rows of two tables.
An owning object that can be used to lexicographically compare two rows of the same table.
auto less_equivalent(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
self_comparator(table_view const &t, host_span< order const > column_order={}, host_span< null_order const > null_precedence={}, rmm::cuda_stream_view stream=cudf::get_default_stream())
Construct an owning object for performing a lexicographic comparison between two rows of the same tab...
self_comparator(std::shared_ptr< preprocessed_table > t)
Construct an owning object for performing a lexicographic comparison between two rows of the same pre...
auto less(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
An owning object that can be used to lexicographically compare rows of two different tables.
two_table_comparator(std::shared_ptr< preprocessed_table > left, std::shared_ptr< preprocessed_table > right)
Construct an owning object for performing a lexicographic comparison between two rows of the same pre...
auto less(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
two_table_comparator(table_view const &left, table_view const &right, host_span< order const > column_order={}, host_span< null_order const > null_precedence={}, rmm::cuda_stream_view stream=cudf::get_default_stream())
Construct an owning object for performing a lexicographic comparison between rows of two different ta...
auto less_equivalent(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
Table device view that is usable in device memory.
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:200
A set of cudf::column's of the same size.
Definition: table.hpp:40
Column device view class definitions.
uint32_t hash_value_type
Type of hash value.
Definition: hashing.hpp:29
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
std::unique_ptr< cudf::column > is_valid(cudf::column_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Creates a column of type_id::BOOL8 elements where for every element in input true indicates the value...
CUDF_HOST_DEVICE constexpr decltype(auto) __forceinline__ type_dispatcher(cudf::data_type dtype, Functor f, Ts &&... args)
Invokes an operator() template with the type instantiation based on the specified cudf::data_type's i...
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:178
null_order
Indicates how null values compare against all other values.
Definition: types.hpp:159
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:151
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:95
#define CUDF_ENABLE_IF(...)
Convenience macro for SFINAE as an unnamed template parameter.
Definition: traits.hpp:48
cuDF interfaces
Definition: host_udf.hpp:39
weak_ordering
Result type of the element_relational_comparator function object.
@ EQUIVALENT
Indicates a is ordered neither before nor after b
auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence)
Compare the nulls according to null order.
CUDF_HOST_DEVICE auto make_list_size_iterator(detail::lists_column_device_view const &c)
Makes an iterator that returns size of the list by row index.
Column APIs for sort and rank.
Device version of C++20 std::span with reduced feature set.
Definition: span.hpp:346
A map from cudf::type_id to cudf type that excludes LIST and STRUCT types.
std::conditional_t< t==type_id::STRUCT or t==type_id::LIST, void, id_to_type< t > > type
The type to dispatch to if the type is nested.
Equality comparator functor that compares physical values rather than logical elements like lists,...
constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
Operator for equality comparison of non-floating point values.
Equality comparator functor that compares physical values rather than logical elements like lists,...
constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
Operator for equality comparisons.
Preprocessed table for use with row equality comparison or row hashing.
static std::shared_ptr< preprocessed_table > create(table_view const &table, rmm::cuda_stream_view stream)
Factory to construct preprocessed_table for use with row equality comparison or row hashing.
Wraps and interprets the result of device_row_comparator, true if the result is weak_ordering::LESS m...
less_comparator(Comparator const &comparator)
Constructs a less_comparator.
Wraps and interprets the result of device_row_comparator, true if the result is weak_ordering::LESS o...
less_equivalent_comparator(Comparator const &comparator)
Constructs a less_equivalent_comparator.
Computes a weak ordering of two values with special sorting behavior.
constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
Operator for relational comparisons.
Preprocessed table for use with lexicographical comparison.
std::invoke_result_t< decltype(table_device_view::create), table_view, rmm::cuda_stream_view > table_device_view_owner
Type of table device view owner for the preprocessed table.
static std::pair< std::shared_ptr< preprocessed_table >, std::shared_ptr< preprocessed_table > > create(table_view const &lhs, table_view const &rhs, host_span< order const > column_order, host_span< null_order const > null_precedence, rmm::cuda_stream_view stream)
Preprocess tables for use with lexicographical comparison.
static std::shared_ptr< preprocessed_table > create(table_view const &table, host_span< order const > column_order, host_span< null_order const > null_precedence, rmm::cuda_stream_view stream)
Preprocess table for use with lexicographical comparison.
Relational comparator functor that compares physical values rather than logical elements like lists,...
constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
Operator for relational comparison of non-floating point values.
A counting iterator that uses strongly typed indices bound to tables.
thrust::iterator_adaptor< strong_index_iterator< Index >, Index > super_t
The base class.
constexpr strong_index_iterator(Underlying n)
Constructs a strong index iterator.
C++20 std::span with reduced feature set.
Definition: span.hpp:194
Indicates the presence of nulls at compile-time or runtime.
Table device view class definitions.
bool has_nested_columns(table_view const &table)
Determine if any nested columns exist in a given table.
Defines the mapping between cudf::type_id runtime type information and concrete C++ types.
#define CUDF_HOST_DEVICE
Indicates that the function or method is usable on host and device.
Definition: types.hpp:32