20 #include <cudf/detail/iterator.cuh>
21 #include <cudf/detail/utilities/algorithm.cuh>
22 #include <cudf/detail/utilities/assert.cuh>
23 #include <cudf/hashing/detail/default_hash.cuh>
24 #include <cudf/hashing/detail/hashing.hpp>
25 #include <cudf/lists/detail/dremel.hpp>
26 #include <cudf/lists/list_device_view.cuh>
27 #include <cudf/lists/lists_column_device_view.cuh>
29 #include <cudf/structs/structs_column_device_view.cuh>
30 #include <cudf/table/row_operators.cuh>
36 #include <cuda/std/tuple>
37 #include <cuda/std/utility>
38 #include <thrust/detail/use_default.h>
39 #include <thrust/equal.h>
40 #include <thrust/execution_policy.h>
41 #include <thrust/functional.h>
42 #include <thrust/iterator/counting_iterator.h>
43 #include <thrust/iterator/iterator_adaptor.h>
44 #include <thrust/iterator/iterator_categories.h>
45 #include <thrust/iterator/iterator_facade.h>
46 #include <thrust/iterator/transform_iterator.h>
47 #include <thrust/logical.h>
48 #include <thrust/swap.h>
49 #include <thrust/transform_reduce.h>
54 #include <type_traits>
57 namespace CUDF_EXPORT
cudf {
59 namespace experimental {
76 template <cudf::type_
id t>
79 using type = std::conditional_t<t == type_id::STRUCT or t == type_id::LIST, void, id_to_type<t>>;
100 template <
typename Index,
typename Underlying = std::underlying_type_t<Index>>
104 thrust::random_access_traversal_tag,
108 thrust::iterator_adaptor<strong_index_iterator<Index>, Index>;
117 friend class thrust::iterator_core_access;
120 __device__ constexpr
void increment() { ++begin; }
121 __device__ constexpr
void decrement() { --begin; }
123 __device__ constexpr
void advance(Underlying n) { begin += n; }
125 __device__ constexpr
bool equal(strong_index_iterator<Index>
const& other)
const noexcept
127 return begin == other.begin;
130 __device__ constexpr Index dereference() const noexcept {
return static_cast<Index
>(begin); }
132 __device__ constexpr Underlying distance_to(
133 strong_index_iterator<Index>
const& other)
const noexcept
135 return other.begin - begin;
144 using lhs_iterator = strong_index_iterator<lhs_index_type>;
149 using rhs_iterator = strong_index_iterator<rhs_index_type>;
151 namespace lexicographic {
168 template <
typename Element>
171 return detail::compare_elements(lhs, rhs);
188 template <
typename Element, CUDF_ENABLE_IF(not std::is_
floating_po
int_v<Element>)>
191 return detail::compare_elements(lhs, rhs);
201 template <
typename Element, CUDF_ENABLE_IF(std::is_
floating_po
int_v<Element>)>
205 return isnan(rhs) ? weak_ordering::EQUIVALENT : weak_ordering::GREATER;
206 }
else if (isnan(rhs)) {
207 return weak_ordering::LESS;
210 return detail::compare_elements(lhs, rhs);
214 using optional_dremel_view = cuda::std::optional<detail::dremel_device_view const>;
263 template <
bool has_nested_columns,
265 typename PhysicalElementComparator = sorting_physical_element_comparator>
298 PhysicalElementComparator comparator = {}) noexcept
301 _l_dremel(l_dremel_device_views),
302 _r_dremel(r_dremel_device_views),
303 _check_nulls{check_nulls},
305 _column_order{column_order},
306 _null_precedence{null_precedence},
307 _comparator{comparator}
329 template <
bool nested_disable = not has_nested_columns, CUDF_ENABLE_IF(nested_disable)>
336 PhysicalElementComparator comparator = {}) noexcept
341 _check_nulls{check_nulls},
343 _column_order{column_order},
344 _null_precedence{null_precedence},
345 _comparator{comparator}
373 null_order null_precedence = null_order::BEFORE,
375 PhysicalElementComparator comparator = {},
376 optional_dremel_view l_dremel_device_view = {},
377 optional_dremel_view r_dremel_device_view = {})
380 _check_nulls{check_nulls},
381 _null_precedence{null_precedence},
383 _l_dremel_device_view{l_dremel_device_view},
384 _r_dremel_device_view{r_dremel_device_view},
385 _comparator{comparator}
397 template <
typename Element,
398 CUDF_ENABLE_IF(cudf::is_relationally_comparable<Element, Element>())>
403 bool const lhs_is_null{_lhs.is_null(lhs_element_index)};
404 bool const rhs_is_null{_rhs.is_null(rhs_element_index)};
406 if (lhs_is_null or rhs_is_null) {
407 return cuda::std::pair(
null_compare(lhs_is_null, rhs_is_null, _null_precedence), _depth);
411 return cuda::std::pair(_comparator(_lhs.element<Element>(lhs_element_index),
412 _rhs.element<Element>(rhs_element_index)),
413 std::numeric_limits<int>::max());
423 template <
typename Element,
424 CUDF_ENABLE_IF(not cudf::is_relationally_comparable<Element, Element>() and
425 (not has_nested_columns or not cudf::is_nested<Element>()))>
429 CUDF_UNREACHABLE(
"Attempted to compare elements of uncomparable types.");
440 template <
typename Element,
441 CUDF_ENABLE_IF(has_nested_columns and std::is_same_v<Element, cudf::struct_view>)>
448 while (lcol.
type().
id() == type_id::STRUCT) {
449 bool const lhs_is_null{lcol.
is_null(lhs_element_index)};
450 bool const rhs_is_null{rcol.
is_null(rhs_element_index)};
452 if (lhs_is_null or rhs_is_null) {
454 return cuda::std::pair(state, depth);
458 return cuda::std::pair(weak_ordering::EQUIVALENT, std::numeric_limits<int>::max());
467 return cudf::type_dispatcher<dispatch_void_if_nested>(
482 template <
typename Element,
483 CUDF_ENABLE_IF(has_nested_columns and std::is_same_v<Element, cudf::list_view>)>
488 auto const is_l_row_null = _lhs.is_null(lhs_element_index);
489 auto const is_r_row_null = _rhs.is_null(rhs_element_index);
490 if (is_l_row_null || is_r_row_null) {
491 return cuda::std::pair(
null_compare(is_l_row_null, is_r_row_null, _null_precedence),
496 auto const l_max_def_level = _l_dremel_device_view->max_def_level;
497 auto const r_max_def_level = _r_dremel_device_view->max_def_level;
498 auto const l_def_levels = _l_dremel_device_view->def_levels;
499 auto const r_def_levels = _r_dremel_device_view->def_levels;
500 auto const l_rep_levels = _l_dremel_device_view->rep_levels;
501 auto const r_rep_levels = _r_dremel_device_view->rep_levels;
508 while (lcol.
type().
id() == type_id::LIST) {
516 auto const l_offsets = _l_dremel_device_view->offsets;
517 auto const r_offsets = _r_dremel_device_view->offsets;
518 auto l_start = l_offsets[lhs_element_index];
519 auto l_end = l_offsets[lhs_element_index + 1];
520 auto r_start = r_offsets[rhs_element_index];
521 auto r_end = r_offsets[rhs_element_index + 1];
531 for (
int l_dremel_index = l_start, r_dremel_index = r_start, element_index = 0;
532 l_dremel_index < l_end and r_dremel_index < r_end;
533 ++l_dremel_index, ++r_dremel_index) {
534 auto const l_rep_level = l_rep_levels[l_dremel_index];
535 auto const r_rep_level = r_rep_levels[r_dremel_index];
538 if (l_rep_level != r_rep_level) {
540 return l_rep_level < r_rep_level ? cuda::std::pair(weak_ordering::LESS, _depth)
541 : cuda::std::pair(weak_ordering::GREATER, _depth);
545 auto const l_def_level = l_def_levels[l_dremel_index];
546 auto const r_def_level = r_def_levels[r_dremel_index];
549 if (l_def_level < l_max_def_level || r_def_level < r_max_def_level) {
556 if ((lcol.
nullable() and l_def_levels[l_dremel_index] == l_max_def_level - 1) or
557 (rcol.
nullable() and r_def_levels[r_dremel_index] == r_max_def_level - 1)) {
560 if (l_def_level == r_def_level) {
continue; }
563 return l_def_level < r_def_level ? cuda::std::pair(weak_ordering::LESS, _depth)
564 : cuda::std::pair(weak_ordering::GREATER, _depth);
569 int last_null_depth = _depth;
570 cuda::std::tie(state, last_null_depth) = cudf::type_dispatcher<dispatch_void_if_nested>(
571 lcol.
type(), comparator, element_index, element_index);
572 if (state != weak_ordering::EQUIVALENT) {
return cuda::std::pair(state, _depth); }
583 return cuda::std::pair(detail::compare_elements(l_end - l_start, r_end - r_start), _depth);
589 Nullate
const _check_nulls;
592 optional_dremel_view _l_dremel_device_view;
593 optional_dremel_view _r_dremel_device_view;
594 PhysicalElementComparator
const _comparator;
608 size_type const rhs_index)
const noexcept
610 int last_null_depth = std::numeric_limits<int>::max();
612 for (
size_type i = 0; i < _lhs.num_columns(); ++i) {
613 if (_lhs.column(i).type().id() == type_id::LIST) { ++list_column_index; }
615 int const depth = _depth.has_value() ? (*_depth)[i] : 0;
616 if (depth > last_null_depth) {
continue; }
618 bool const ascending =
619 _column_order.has_value() ? (*_column_order)[i] == order::ASCENDING :
true;
622 _null_precedence.has_value() ? (*_null_precedence)[i] : null_order::BEFORE;
627 auto const [l_dremel_i, r_dremel_i] =
628 _lhs.column(i).type().id() == type_id::LIST
629 ? std::make_tuple(optional_dremel_view(_l_dremel[list_column_index]),
630 optional_dremel_view(_r_dremel[list_column_index]))
631 : std::make_tuple(optional_dremel_view{}, optional_dremel_view{});
643 cuda::std::tie(state, last_null_depth) =
646 if (state == weak_ordering::EQUIVALENT) {
continue; }
650 : (state == weak_ordering::GREATER ? weak_ordering::LESS : weak_ordering::GREATER);
652 return weak_ordering::EQUIVALENT;
660 Nullate
const _check_nulls;
661 std::optional<device_span<int const>>
const _depth;
662 std::optional<device_span<order const>>
const _column_order;
663 std::optional<device_span<null_order const>>
const _null_precedence;
664 PhysicalElementComparator
const _comparator;
678 struct weak_ordering_comparator_impl {
679 static_assert(not((weak_ordering::EQUIVALENT == values) && ...),
680 "weak_ordering_comparator should not be used for pure equality comparisons. The "
681 "`row_equality_comparator` should be used instead");
683 template <
typename LhsType,
typename RhsType>
684 __device__ constexpr
bool operator()(LhsType
const lhs_index,
685 RhsType
const rhs_index)
const noexcept
687 weak_ordering const result = comparator(lhs_index, rhs_index);
688 return ((result == values) || ...);
690 Comparator
const comparator;
699 template <
typename Comparator>
700 struct less_comparator : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS> {
707 : weak_ordering_comparator_impl<Comparator,
weak_ordering::LESS>{comparator}
719 template <
typename Comparator>
721 : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS, weak_ordering::EQUIVALENT> {
791 static std::pair<std::shared_ptr<preprocessed_table>, std::shared_ptr<preprocessed_table>>
create(
821 static std::shared_ptr<preprocessed_table> create(
823 std::vector<int>&& verticalized_col_depths,
824 std::vector<std::unique_ptr<column>>&& transformed_columns,
827 bool has_ranked_children,
860 std::vector<detail::dremel_data>&& dremel_data,
862 std::vector<std::unique_ptr<column>>&& transformed_columns,
863 bool has_ranked_children);
869 std::vector<std::unique_ptr<column>>&& transformed_columns,
870 bool has_ranked_children);
885 [[nodiscard]] std::optional<device_span<order const>> column_order()
const
887 return _column_order.size() ? std::optional<device_span<order const>>(_column_order)
898 [[nodiscard]] std::optional<device_span<null_order const>> null_precedence()
const
900 return _null_precedence.size() ? std::optional<device_span<null_order const>>(_null_precedence)
912 [[nodiscard]] std::optional<device_span<int const>> depths()
const
914 return _depths.size() ? std::optional<device_span<int const>>(_depths) : std::nullopt;
919 if (_dremel_device_views.has_value()) {
926 template <
typename PhysicalElementComparator>
927 void check_physical_element_comparator()
929 if constexpr (!std::is_same_v<PhysicalElementComparator, sorting_physical_element_comparator>) {
931 "The input table has nested type children and they were transformed using a "
932 "different type of physical element comparator.");
937 table_device_view_owner
const _t;
943 std::optional<std::vector<detail::dremel_data>> _dremel_data;
944 std::optional<rmm::device_uvector<detail::dremel_device_view>> _dremel_device_views;
948 std::vector<std::unique_ptr<column>> _transformed_columns;
952 bool const _has_ranked_children;
988 : d_t{preprocessed_table::create(t, column_order, null_precedence, stream)}
1031 template <
bool has_nested_columns,
1034 auto less(Nullate
nullate = {}, PhysicalElementComparator comparator = {})
const
1036 d_t->check_physical_element_comparator<PhysicalElementComparator>();
1038 return less_comparator{
1039 device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1043 d_t->dremel_device_views(),
1044 d_t->dremel_device_views(),
1046 d_t->column_order(),
1047 d_t->null_precedence(),
1052 template <
bool has_nested_columns,
1054 typename PhysicalElementComparator = sorting_physical_element_comparator>
1057 d_t->check_physical_element_comparator<PhysicalElementComparator>();
1059 return less_equivalent_comparator{
1060 device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1064 d_t->dremel_device_views(),
1065 d_t->dremel_device_views(),
1067 d_t->column_order(),
1068 d_t->null_precedence(),
1073 std::shared_ptr<preprocessed_table> d_t;
1077 template <
typename Comparator>
1078 struct strong_index_comparator_adapter {
1079 strong_index_comparator_adapter(Comparator
const& comparator) : comparator{comparator} {}
1081 __device__ constexpr
weak_ordering operator()(lhs_index_type
const lhs_index,
1082 rhs_index_type
const rhs_index)
const noexcept
1088 __device__ constexpr
weak_ordering operator()(rhs_index_type
const rhs_index,
1089 lhs_index_type
const lhs_index)
const noexcept
1091 auto const left_right_ordering =
1095 if (left_right_ordering == weak_ordering::LESS) {
1096 return weak_ordering::GREATER;
1097 }
else if (left_right_ordering == weak_ordering::GREATER) {
1098 return weak_ordering::LESS;
1100 return weak_ordering::EQUIVALENT;
1103 Comparator
const comparator;
1162 std::shared_ptr<preprocessed_table> right)
1163 : d_left_table{std::move(left)}, d_right_table{std::move(right)}
1203 template <
bool has_nested_columns,
1206 auto less(Nullate
nullate = {}, PhysicalElementComparator comparator = {})
const
1208 d_left_table->check_physical_element_comparator<PhysicalElementComparator>();
1209 d_right_table->check_physical_element_comparator<PhysicalElementComparator>();
1211 return less_comparator{strong_index_comparator_adapter{
1212 device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1216 d_left_table->dremel_device_views(),
1217 d_right_table->dremel_device_views(),
1218 d_left_table->depths(),
1219 d_left_table->column_order(),
1220 d_left_table->null_precedence(),
1227 typename PhysicalElementComparator = sorting_physical_element_comparator>
1230 d_left_table->check_physical_element_comparator<PhysicalElementComparator>();
1231 d_right_table->check_physical_element_comparator<PhysicalElementComparator>();
1233 return less_equivalent_comparator{strong_index_comparator_adapter{
1234 device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1238 d_left_table->dremel_device_views(),
1239 d_right_table->dremel_device_views(),
1240 d_left_table->depths(),
1241 d_left_table->column_order(),
1242 d_left_table->null_precedence(),
1247 std::shared_ptr<preprocessed_table> d_left_table;
1248 std::shared_ptr<preprocessed_table> d_right_table;
1257 namespace equality {
1274 template <
typename Element>
1275 __device__ constexpr
bool operator()(Element
const lhs, Element
const rhs)
const noexcept
1293 template <
typename Element, CUDF_ENABLE_IF(not std::is_
floating_po
int_v<Element>)>
1294 __device__ constexpr
bool operator()(Element
const lhs, Element
const rhs)
const noexcept
1308 template <
typename Element, CUDF_ENABLE_IF(std::is_
floating_po
int_v<Element>)>
1309 __device__ constexpr
bool operator()(Element
const lhs, Element
const rhs)
const noexcept
1311 return isnan(lhs) and isnan(rhs) ? true : lhs == rhs;
1336 template <
bool has_nested_columns,
1338 typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
1353 size_type const rhs_index)
const noexcept
1358 element_comparator{check_nulls, l, r, nulls_are_equal, comparator},
1363 return thrust::equal(thrust::seq, lhs.
begin(), lhs.
end(), rhs.
begin(), equal_elements);
1381 PhysicalEqualityComparator comparator = {}) noexcept
1384 check_nulls{check_nulls},
1385 nulls_are_equal{nulls_are_equal},
1386 comparator{comparator}
1393 class element_comparator {
1407 __device__ element_comparator(Nullate check_nulls,
1408 column_device_view lhs,
1409 column_device_view rhs,
1411 PhysicalEqualityComparator comparator = {}) noexcept
1414 check_nulls{check_nulls},
1415 nulls_are_equal{nulls_are_equal},
1416 comparator{comparator}
1428 template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
1429 __device__
bool operator()(
size_type const lhs_element_index,
1430 size_type const rhs_element_index)
const noexcept
1433 bool const lhs_is_null{lhs.is_null(lhs_element_index)};
1434 bool const rhs_is_null{rhs.is_null(rhs_element_index)};
1435 if (lhs_is_null and rhs_is_null) {
1436 return nulls_are_equal == null_equality::EQUAL;
1437 }
else if (lhs_is_null != rhs_is_null) {
1442 return comparator(lhs.element<Element>(lhs_element_index),
1443 rhs.element<Element>(rhs_element_index));
1446 template <
typename Element,
1447 CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>() and
1448 (not has_nested_columns or not cudf::is_nested<Element>())),
1450 __device__
bool operator()(Args...)
1452 CUDF_UNREACHABLE(
"Attempted to compare elements of uncomparable types.");
1455 template <typename Element, CUDF_ENABLE_IF(has_nested_columns and cudf::is_nested<Element>())>
1456 __device__
bool operator()(
size_type const lhs_element_index,
1457 size_type const rhs_element_index)
const noexcept
1459 column_device_view lcol = lhs.slice(lhs_element_index, 1);
1460 column_device_view rcol = rhs.slice(rhs_element_index, 1);
1461 while (lcol.type().id() == type_id::STRUCT || lcol.type().id() == type_id::LIST) {
1463 auto lvalid = detail::make_validity_iterator<true>(lcol);
1464 auto rvalid = detail::make_validity_iterator<true>(rcol);
1465 if (nulls_are_equal == null_equality::UNEQUAL) {
1467 thrust::seq, lvalid, lvalid + lcol.size(), thrust::logical_not<bool>()) or
1469 thrust::seq, rvalid, rvalid + rcol.size(), thrust::logical_not<bool>())) {
1473 if (not thrust::equal(thrust::seq, lvalid, lvalid + lcol.size(), rvalid)) {
1478 if (lcol.type().id() == type_id::STRUCT) {
1479 if (lcol.num_child_columns() == 0) {
return true; }
1481 lcol = detail::structs_column_device_view(lcol).get_sliced_child(0);
1482 rcol = detail::structs_column_device_view(rcol).get_sliced_child(0);
1483 }
else if (lcol.type().id() == type_id::LIST) {
1484 auto l_list_col = detail::lists_column_device_view(lcol);
1485 auto r_list_col = detail::lists_column_device_view(rcol);
1489 if (not thrust::equal(thrust::seq, lsizes, lsizes + lcol.size(), rsizes)) {
1493 lcol = l_list_col.get_sliced_child();
1494 rcol = r_list_col.get_sliced_child();
1495 if (lcol.size() != rcol.size()) {
return false; }
1499 auto comp = column_comparator{
1500 element_comparator{check_nulls, lcol, rcol, nulls_are_equal, comparator}, lcol.size()};
1501 return type_dispatcher<dispatch_void_if_nested>(lcol.type(), comp);
1512 struct column_comparator {
1513 element_comparator
const comp;
1521 template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
1522 __device__
bool operator()()
const noexcept
1524 return thrust::all_of(thrust::seq,
1525 thrust::make_counting_iterator(0),
1526 thrust::make_counting_iterator(0) + size,
1527 [=](
auto i) {
return comp.template operator()<Element>(i, i); });
1530 template <
typename Element,
1531 CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>()),
1533 __device__
bool operator()(Args...) const noexcept
1535 CUDF_UNREACHABLE(
"Attempted to compare elements of uncomparable types.");
1539 column_device_view
const lhs;
1540 column_device_view
const rhs;
1541 Nullate
const check_nulls;
1543 PhysicalEqualityComparator
const comparator;
1546 table_device_view
const lhs;
1547 table_device_view
const rhs;
1548 Nullate
const check_nulls;
1550 PhysicalEqualityComparator
const comparator;
1578 using table_device_view_owner =
1582 std::vector<rmm::device_buffer>&& null_buffers,
1583 std::vector<std::unique_ptr<column>>&& tmp_columns)
1584 : _t(std::move(
table)),
1585 _null_buffers(std::move(null_buffers)),
1586 _tmp_columns(std::move(tmp_columns))
1597 table_device_view_owner _t;
1598 std::vector<rmm::device_buffer> _null_buffers;
1599 std::vector<std::unique_ptr<column>> _tmp_columns;
1657 template <
bool has_nested_columns,
1662 PhysicalEqualityComparator comparator = {})
const noexcept
1664 return device_row_comparator<has_nested_columns, Nullate, PhysicalEqualityComparator>{
1665 nullate, *d_t, *d_t, nulls_are_equal, comparator};
1669 std::shared_ptr<preprocessed_table> d_t;
1673 template <
typename Comparator>
1674 struct strong_index_comparator_adapter {
1675 strong_index_comparator_adapter(Comparator
const& comparator) : comparator{comparator} {}
1677 __device__ constexpr
bool operator()(lhs_index_type
const lhs_index,
1678 rhs_index_type
const rhs_index)
const noexcept
1684 __device__ constexpr
bool operator()(rhs_index_type
const rhs_index,
1685 lhs_index_type
const lhs_index)
const noexcept
1687 return this->operator()(lhs_index, rhs_index);
1690 Comparator
const comparator;
1736 std::shared_ptr<preprocessed_table> right)
1737 : d_left_table{std::move(left)}, d_right_table{std::move(right)}
1771 template <
bool has_nested_columns,
1776 PhysicalEqualityComparator comparator = {})
const noexcept
1778 return strong_index_comparator_adapter{
1779 device_row_comparator<has_nested_columns, Nullate, PhysicalEqualityComparator>(
1780 nullate, *d_left_table, *d_right_table, nulls_are_equal, comparator)};
1784 std::shared_ptr<preprocessed_table> d_left_table;
1785 std::shared_ptr<preprocessed_table> d_right_table;
1798 template <
template <
typename>
class hash_function,
typename Nullate>
1810 uint32_t seed = DEFAULT_HASH_SEED,
1811 hash_value_type null_hash = std::numeric_limits<hash_value_type>::max()) noexcept
1812 : _check_nulls(nulls), _seed(seed), _null_hash(null_hash)
1824 template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
1828 if (_check_nulls && col.
is_null(row_index)) {
return _null_hash; }
1829 return hash_function<T>{_seed}(col.
element<T>(row_index));
1840 template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
1844 CUDF_UNREACHABLE(
"Unsupported type in hash.");
1858 template <
template <
typename>
class hash_function,
typename Nullate>
1871 auto it = thrust::make_transform_iterator(_table.begin(), [=](
auto const&
column) {
1872 return cudf::type_dispatcher<dispatch_storage_type>(
1874 element_hasher_adapter<hash_function>{_check_nulls, _seed},
1880 return detail::accumulate(it, it + _table.num_columns(), _seed, [](
auto hash,
auto h) {
1881 return cudf::hashing::detail::hash_combine(hash, h);
1893 template <
template <
typename>
class hash_fn>
1894 class element_hasher_adapter {
1895 static constexpr
hash_value_type NULL_HASH = std::numeric_limits<hash_value_type>::max();
1899 __device__ element_hasher_adapter(Nullate check_nulls, uint32_t seed) noexcept
1900 : _element_hasher(check_nulls, seed), _check_nulls(check_nulls)
1904 template <typename T, CUDF_ENABLE_IF(not cudf::is_nested<T>())>
1908 return _element_hasher.template operator()<T>(col, row_index);
1911 template <typename T, CUDF_ENABLE_IF(cudf::is_nested<T>())>
1916 column_device_view curr_col = col.
slice(row_index, 1);
1917 while (curr_col.type().id() == type_id::STRUCT || curr_col.type().id() == type_id::LIST) {
1919 auto validity_it = detail::make_validity_iterator<true>(curr_col);
1920 hash = detail::accumulate(
1921 validity_it, validity_it + curr_col.size(), hash, [](
auto hash,
auto is_valid) {
1922 return cudf::hashing::detail::hash_combine(hash,
1923 is_valid ? NON_NULL_HASH : NULL_HASH);
1926 if (curr_col.type().id() == type_id::STRUCT) {
1927 if (curr_col.num_child_columns() == 0) {
return hash; }
1929 curr_col = detail::structs_column_device_view(curr_col).get_sliced_child(0);
1930 }
else if (curr_col.type().id() == type_id::LIST) {
1931 auto list_col = detail::lists_column_device_view(curr_col);
1933 hash = detail::accumulate(
1934 list_sizes, list_sizes + list_col.size(), hash, [](
auto hash,
auto size) {
1935 return cudf::hashing::detail::hash_combine(hash, hash_fn<size_type>{}(size));
1937 curr_col = list_col.get_sliced_child();
1940 for (
int i = 0; i < curr_col.size(); ++i) {
1941 hash = cudf::hashing::detail::hash_combine(
1943 type_dispatcher<dispatch_void_if_nested>(curr_col.type(), _element_hasher, curr_col, i));
1948 element_hasher<hash_fn, Nullate>
const _element_hasher;
1949 Nullate
const _check_nulls;
1953 table_device_view t,
1954 uint32_t seed = DEFAULT_HASH_SEED) noexcept
1955 : _check_nulls{check_nulls}, _table{t}, _seed(seed)
1959 Nullate
const _check_nulls;
1960 table_device_view
const _table;
1961 uint32_t
const _seed;
1967 using preprocessed_table = row::equality::preprocessed_table;
1996 row_hasher(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
2010 template <
template <
typename>
class hash_function = cudf::hashing::detail::default_hash,
2011 template <
template <
typename>
class,
typename>
2015 uint32_t seed = DEFAULT_HASH_SEED)
const
2017 return DeviceRowHasher<hash_function, Nullate>(
nullate, *d_t, seed);
2021 std::shared_ptr<preprocessed_table> d_t;
An immutable, non-owning view of device data as a column of elements that is trivially copyable and u...
T element(size_type element_index) const noexcept
Returns reference to element at the specified index.
CUDF_HOST_DEVICE column_device_view slice(size_type offset, size_type size) const noexcept
Get a new column_device_view which is a slice of this column.
CUDF_HOST_DEVICE size_type num_child_columns() const noexcept
Returns the number of child columns.
const_iterator< T > begin() const
Return an iterator to the first element of the column.
const_iterator< T > end() const
Returns an iterator to the element following the last element of the column.
A container of nullable device data as a column of elements.
constexpr type_id id() const noexcept
Returns the type identifier.
CUDF_HOST_DEVICE data_type type() const noexcept
Returns the element type.
bool is_null(size_type element_index) const noexcept
Returns whether the specified element is null.
CUDF_HOST_DEVICE bool nullable() const noexcept
Indicates whether the column can contain null elements, i.e., if it has an allocated bitmask.
Given a column_device_view, an instance of this class provides a wrapper on this compound column for ...
column_device_view get_sliced_child() const
Fetches the child column of the underlying list column with offset and size applied.
Given a column_device_view, an instance of this class provides a wrapper on this compound column for ...
column_device_view get_sliced_child(size_type idx) const
Fetches the child column of the underlying struct column.
Computes the equality comparison between 2 rows.
constexpr bool operator()(size_type const lhs_index, size_type const rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table is equal to the row at rhs_index in the rhs tabl...
Comparator for performing equality comparisons between two rows of the same table.
self_comparator(std::shared_ptr< preprocessed_table > t)
Construct an owning object for performing equality comparisons between two rows of the same table.
self_comparator(table_view const &t, rmm::cuda_stream_view stream)
Construct an owning object for performing equality comparisons between two rows of the same table.
auto equal_to(Nullate nullate={}, null_equality nulls_are_equal=null_equality::EQUAL, PhysicalEqualityComparator comparator={}) const noexcept
Get the comparison operator to use on the device.
An owning object that can be used to equality compare rows of two different tables.
two_table_comparator(std::shared_ptr< preprocessed_table > left, std::shared_ptr< preprocessed_table > right)
Construct an owning object for performing equality comparisons between two rows from two tables.
auto equal_to(Nullate nullate={}, null_equality nulls_are_equal=null_equality::EQUAL, PhysicalEqualityComparator comparator={}) const noexcept
Return the binary operator for comparing rows in the table.
two_table_comparator(table_view const &left, table_view const &right, rmm::cuda_stream_view stream)
Construct an owning object for performing equality comparisons between two rows from two tables.
Computes the hash value of a row in the given table.
auto operator()(size_type row_index) const noexcept
Return the hash value of a row in the given table.
Computes the hash value of an element in the given column.
uint32_t _seed
The seed to use for hashing.
element_hasher(Nullate nulls, uint32_t seed=DEFAULT_HASH_SEED, hash_value_type null_hash=std::numeric_limits< hash_value_type >::max()) noexcept
Constructs an element_hasher object.
Nullate _check_nulls
Whether to check for nulls.
hash_value_type _null_hash
Hash value to use for null elements.
Computes the hash value of a row in the given table.
DeviceRowHasher< hash_function, Nullate > device_hasher(Nullate nullate={}, uint32_t seed=DEFAULT_HASH_SEED) const
Get the hash operator to use on the device.
row_hasher(table_view const &t, rmm::cuda_stream_view stream)
Construct an owning object for hashing the rows of a table.
row_hasher(std::shared_ptr< preprocessed_table > t)
Construct an owning object for hashing the rows of a table from an existing preprocessed_table.
Performs a relational comparison between two elements in two columns.
cuda::std::pair< weak_ordering, int > operator()(size_type lhs_element_index, size_type rhs_element_index)
Compares two list-type columns.
cuda::std::pair< weak_ordering, int > operator()(size_type const lhs_element_index, size_type const rhs_element_index) const noexcept
Performs a relational comparison between the specified elements.
cuda::std::pair< weak_ordering, int > operator()(size_type const, size_type const) const noexcept
Throws run-time error when columns types cannot be compared or if this class is instantiated with has...
element_comparator(Nullate check_nulls, column_device_view lhs, column_device_view rhs, null_order null_precedence=null_order::BEFORE, int depth=0, PhysicalElementComparator comparator={}, optional_dremel_view l_dremel_device_view={}, optional_dremel_view r_dremel_device_view={})
Construct type-dispatched function object for performing a relational comparison between two elements...
Computes the lexicographic comparison between 2 rows.
device_row_comparator(Nullate check_nulls, table_device_view lhs, table_device_view rhs, std::optional< device_span< order const >> column_order=std::nullopt, std::optional< device_span< null_order const >> null_precedence=std::nullopt, PhysicalElementComparator comparator={}) noexcept
Construct a function object for performing a lexicographic comparison between the rows of two tables....
device_row_comparator(Nullate check_nulls, table_device_view lhs, table_device_view rhs, device_span< detail::dremel_device_view const > l_dremel_device_views, device_span< detail::dremel_device_view const > r_dremel_device_views, std::optional< device_span< int const >> depth=std::nullopt, std::optional< device_span< order const >> column_order=std::nullopt, std::optional< device_span< null_order const >> null_precedence=std::nullopt, PhysicalElementComparator comparator={}) noexcept
Construct a function object for performing a lexicographic comparison between the rows of two tables.
constexpr weak_ordering operator()(size_type const lhs_index, size_type const rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table compares lexicographically less,...
An owning object that can be used to lexicographically compare two rows of the same table.
auto less_equivalent(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
self_comparator(table_view const &t, host_span< order const > column_order={}, host_span< null_order const > null_precedence={}, rmm::cuda_stream_view stream=cudf::get_default_stream())
Construct an owning object for performing a lexicographic comparison between two rows of the same tab...
self_comparator(std::shared_ptr< preprocessed_table > t)
Construct an owning object for performing a lexicographic comparison between two rows of the same pre...
auto less(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
An owning object that can be used to lexicographically compare rows of two different tables.
two_table_comparator(std::shared_ptr< preprocessed_table > left, std::shared_ptr< preprocessed_table > right)
Construct an owning object for performing a lexicographic comparison between two rows of the same pre...
auto less(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
two_table_comparator(table_view const &left, table_view const &right, host_span< order const > column_order={}, host_span< null_order const > null_precedence={}, rmm::cuda_stream_view stream=cudf::get_default_stream())
Construct an owning object for performing a lexicographic comparison between rows of two different ta...
auto less_equivalent(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
Table device view that is usable in device memory.
A set of cudf::column_view's of the same size.
A set of cudf::column's of the same size.
Column device view class definitions.
uint32_t hash_value_type
Type of hash value.
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
CUDF_HOST_DEVICE constexpr decltype(auto) __forceinline__ type_dispatcher(cudf::data_type dtype, Functor f, Ts &&... args)
Invokes an operator() template with the type instantiation based on the specified cudf::data_type's i...
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
null_order
Indicates how null values compare against all other values.
null_equality
Enum to consider two nulls as equal or unequal.
int32_t size_type
Row index type for columns and tables.
#define CUDF_ENABLE_IF(...)
Convenience macro for SFINAE as an unnamed template parameter.
weak_ordering
Result type of the element_relational_comparator function object.
@ EQUIVALENT
Indicates a is ordered neither before nor after b
auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence)
Compare the nulls according to null order.
CUDF_HOST_DEVICE auto make_list_size_iterator(detail::lists_column_device_view const &c)
Makes an iterator that returns size of the list by row index.
Column APIs for sort and rank.
Device version of C++20 std::span with reduced feature set.
A map from cudf::type_id to cudf type that excludes LIST and STRUCT types.
std::conditional_t< t==type_id::STRUCT or t==type_id::LIST, void, id_to_type< t > > type
The type to dispatch to if the type is nested.
Equality comparator functor that compares physical values rather than logical elements like lists,...
constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
Operator for equality comparison of non-floating point values.
Equality comparator functor that compares physical values rather than logical elements like lists,...
constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
Operator for equality comparisons.
Preprocessed table for use with row equality comparison or row hashing.
static std::shared_ptr< preprocessed_table > create(table_view const &table, rmm::cuda_stream_view stream)
Factory to construct preprocessed_table for use with row equality comparison or row hashing.
Wraps and interprets the result of device_row_comparator, true if the result is weak_ordering::LESS m...
less_comparator(Comparator const &comparator)
Constructs a less_comparator.
Wraps and interprets the result of device_row_comparator, true if the result is weak_ordering::LESS o...
less_equivalent_comparator(Comparator const &comparator)
Constructs a less_equivalent_comparator.
Computes a weak ordering of two values with special sorting behavior.
constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
Operator for relational comparisons.
Preprocessed table for use with lexicographical comparison.
std::invoke_result_t< decltype(table_device_view::create), table_view, rmm::cuda_stream_view > table_device_view_owner
Type of table device view owner for the preprocessed table.
static std::pair< std::shared_ptr< preprocessed_table >, std::shared_ptr< preprocessed_table > > create(table_view const &lhs, table_view const &rhs, host_span< order const > column_order, host_span< null_order const > null_precedence, rmm::cuda_stream_view stream)
Preprocess tables for use with lexicographical comparison.
static std::shared_ptr< preprocessed_table > create(table_view const &table, host_span< order const > column_order, host_span< null_order const > null_precedence, rmm::cuda_stream_view stream)
Preprocess table for use with lexicographical comparison.
Relational comparator functor that compares physical values rather than logical elements like lists,...
constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
Operator for relational comparison of non-floating point values.
A counting iterator that uses strongly typed indices bound to tables.
thrust::iterator_adaptor< strong_index_iterator< Index >, Index > super_t
The base class.
constexpr strong_index_iterator(Underlying n)
Constructs a strong index iterator.
C++20 std::span with reduced feature set.
Indicates the presence of nulls at compile-time or runtime.
Table device view class definitions.
bool has_nested_columns(table_view const &table)
Determine if any nested columns exist in a given table.
Defines the mapping between cudf::type_id runtime type information and concrete C++ types.
#define CUDF_HOST_DEVICE
Indicates that the function or method is usable on host and device.