20 #include <cudf/detail/iterator.cuh>
21 #include <cudf/detail/utilities/algorithm.cuh>
22 #include <cudf/detail/utilities/assert.cuh>
23 #include <cudf/hashing/detail/default_hash.cuh>
24 #include <cudf/hashing/detail/hashing.hpp>
25 #include <cudf/lists/detail/dremel.hpp>
26 #include <cudf/lists/list_device_view.cuh>
27 #include <cudf/lists/lists_column_device_view.cuh>
29 #include <cudf/structs/structs_column_device_view.cuh>
30 #include <cudf/table/row_operators.cuh>
36 #include <cuda/std/functional>
37 #include <cuda/std/limits>
38 #include <cuda/std/optional>
39 #include <cuda/std/tuple>
40 #include <cuda/std/utility>
41 #include <thrust/detail/use_default.h>
42 #include <thrust/equal.h>
43 #include <thrust/execution_policy.h>
44 #include <thrust/functional.h>
45 #include <thrust/iterator/counting_iterator.h>
46 #include <thrust/iterator/iterator_adaptor.h>
47 #include <thrust/iterator/iterator_categories.h>
48 #include <thrust/iterator/iterator_facade.h>
49 #include <thrust/iterator/transform_iterator.h>
50 #include <thrust/logical.h>
51 #include <thrust/swap.h>
52 #include <thrust/transform_reduce.h>
55 #include <type_traits>
57 namespace CUDF_EXPORT
cudf {
59 namespace row::primitive {
60 class row_equality_comparator;
62 template <
template <
typename>
class Hash>
66 namespace experimental {
83 template <cudf::type_
id t>
86 using type = std::conditional_t<t == type_id::STRUCT or t == type_id::LIST, void, id_to_type<t>>;
107 template <
typename Index,
typename Underlying = std::underlying_type_t<Index>>
111 thrust::random_access_traversal_tag,
115 thrust::iterator_adaptor<strong_index_iterator<Index>, Index>;
124 friend class thrust::iterator_core_access;
127 __device__ constexpr
void increment() { ++begin; }
128 __device__ constexpr
void decrement() { --begin; }
130 __device__ constexpr
void advance(Underlying n) { begin += n; }
132 __device__ constexpr
bool equal(strong_index_iterator<Index>
const& other)
const noexcept
134 return begin == other.begin;
137 __device__ constexpr Index dereference() const noexcept {
return static_cast<Index
>(begin); }
139 __device__ constexpr Underlying distance_to(
140 strong_index_iterator<Index>
const& other)
const noexcept
142 return other.begin - begin;
151 using lhs_iterator = strong_index_iterator<lhs_index_type>;
156 using rhs_iterator = strong_index_iterator<rhs_index_type>;
158 namespace lexicographic {
175 template <
typename Element>
178 return detail::compare_elements(lhs, rhs);
195 template <
typename Element, CUDF_ENABLE_IF(not std::is_
floating_po
int_v<Element>)>
198 return detail::compare_elements(lhs, rhs);
208 template <
typename Element, CUDF_ENABLE_IF(std::is_
floating_po
int_v<Element>)>
212 return isnan(rhs) ? weak_ordering::EQUIVALENT : weak_ordering::GREATER;
213 }
else if (isnan(rhs)) {
214 return weak_ordering::LESS;
217 return detail::compare_elements(lhs, rhs);
221 using optional_dremel_view = cuda::std::optional<detail::dremel_device_view const>;
270 template <
bool has_nested_columns,
272 typename PhysicalElementComparator = sorting_physical_element_comparator>
306 PhysicalElementComparator comparator = {}) noexcept
309 _l_dremel(l_dremel_device_views),
310 _r_dremel(r_dremel_device_views),
311 _check_nulls{check_nulls},
313 _column_order{column_order},
314 _null_precedence{null_precedence},
315 _comparator{comparator}
337 template <
bool nested_disable = not has_nested_columns, CUDF_ENABLE_IF(nested_disable)>
344 PhysicalElementComparator comparator = {}) noexcept
349 _check_nulls{check_nulls},
351 _column_order{column_order},
352 _null_precedence{null_precedence},
353 _comparator{comparator}
381 null_order null_precedence = null_order::BEFORE,
383 PhysicalElementComparator comparator = {},
384 optional_dremel_view l_dremel_device_view = {},
385 optional_dremel_view r_dremel_device_view = {})
388 _check_nulls{check_nulls},
389 _null_precedence{null_precedence},
391 _l_dremel_device_view{l_dremel_device_view},
392 _r_dremel_device_view{r_dremel_device_view},
393 _comparator{comparator}
405 template <
typename Element,
406 CUDF_ENABLE_IF(cudf::is_relationally_comparable<Element, Element>())>
411 bool const lhs_is_null{_lhs.is_null(lhs_element_index)};
412 bool const rhs_is_null{_rhs.is_null(rhs_element_index)};
414 if (lhs_is_null or rhs_is_null) {
415 return cuda::std::pair(
null_compare(lhs_is_null, rhs_is_null, _null_precedence), _depth);
419 return cuda::std::pair(_comparator(_lhs.element<Element>(lhs_element_index),
420 _rhs.element<Element>(rhs_element_index)),
421 cuda::std::numeric_limits<int>::max());
431 template <
typename Element,
432 CUDF_ENABLE_IF(not cudf::is_relationally_comparable<Element, Element>() and
433 (not has_nested_columns or not cudf::is_nested<Element>()))>
437 CUDF_UNREACHABLE(
"Attempted to compare elements of uncomparable types.");
448 template <
typename Element,
449 CUDF_ENABLE_IF(has_nested_columns and std::is_same_v<Element, cudf::struct_view>)>
456 while (lcol.
type().
id() == type_id::STRUCT) {
457 bool const lhs_is_null{lcol.
is_null(lhs_element_index)};
458 bool const rhs_is_null{rcol.
is_null(rhs_element_index)};
460 if (lhs_is_null or rhs_is_null) {
462 return cuda::std::pair(state, depth);
466 return cuda::std::pair(weak_ordering::EQUIVALENT, cuda::std::numeric_limits<int>::max());
475 return cudf::type_dispatcher<dispatch_void_if_nested>(
490 template <
typename Element,
491 CUDF_ENABLE_IF(has_nested_columns and std::is_same_v<Element, cudf::list_view>)>
496 auto const is_l_row_null = _lhs.is_null(lhs_element_index);
497 auto const is_r_row_null = _rhs.is_null(rhs_element_index);
498 if (is_l_row_null || is_r_row_null) {
499 return cuda::std::pair(
null_compare(is_l_row_null, is_r_row_null, _null_precedence),
504 auto const l_max_def_level = _l_dremel_device_view->max_def_level;
505 auto const r_max_def_level = _r_dremel_device_view->max_def_level;
506 auto const l_def_levels = _l_dremel_device_view->def_levels;
507 auto const r_def_levels = _r_dremel_device_view->def_levels;
508 auto const l_rep_levels = _l_dremel_device_view->rep_levels;
509 auto const r_rep_levels = _r_dremel_device_view->rep_levels;
516 while (lcol.
type().
id() == type_id::LIST) {
524 auto const l_offsets = _l_dremel_device_view->offsets;
525 auto const r_offsets = _r_dremel_device_view->offsets;
526 auto l_start = l_offsets[lhs_element_index];
527 auto l_end = l_offsets[lhs_element_index + 1];
528 auto r_start = r_offsets[rhs_element_index];
529 auto r_end = r_offsets[rhs_element_index + 1];
539 for (
int l_dremel_index = l_start, r_dremel_index = r_start, element_index = 0;
540 l_dremel_index < l_end and r_dremel_index < r_end;
541 ++l_dremel_index, ++r_dremel_index) {
542 auto const l_rep_level = l_rep_levels[l_dremel_index];
543 auto const r_rep_level = r_rep_levels[r_dremel_index];
546 if (l_rep_level != r_rep_level) {
548 return l_rep_level < r_rep_level ? cuda::std::pair(weak_ordering::LESS, _depth)
549 : cuda::std::pair(weak_ordering::GREATER, _depth);
553 auto const l_def_level = l_def_levels[l_dremel_index];
554 auto const r_def_level = r_def_levels[r_dremel_index];
557 if (l_def_level < l_max_def_level || r_def_level < r_max_def_level) {
564 if ((lcol.
nullable() and l_def_levels[l_dremel_index] == l_max_def_level - 1) or
565 (rcol.
nullable() and r_def_levels[r_dremel_index] == r_max_def_level - 1)) {
568 if (l_def_level == r_def_level) {
continue; }
571 return l_def_level < r_def_level ? cuda::std::pair(weak_ordering::LESS, _depth)
572 : cuda::std::pair(weak_ordering::GREATER, _depth);
577 int last_null_depth = _depth;
578 cuda::std::tie(state, last_null_depth) = cudf::type_dispatcher<dispatch_void_if_nested>(
579 lcol.
type(), comparator, element_index, element_index);
580 if (state != weak_ordering::EQUIVALENT) {
return cuda::std::pair(state, _depth); }
591 return cuda::std::pair(detail::compare_elements(l_end - l_start, r_end - r_start), _depth);
597 Nullate
const _check_nulls;
600 optional_dremel_view _l_dremel_device_view;
601 optional_dremel_view _r_dremel_device_view;
602 PhysicalElementComparator
const _comparator;
616 size_type const rhs_index)
const noexcept
618 int last_null_depth = cuda::std::numeric_limits<int>::max();
620 for (
size_type i = 0; i < _lhs.num_columns(); ++i) {
621 if (_lhs.column(i).type().id() == type_id::LIST) { ++list_column_index; }
623 int const depth = _depth.has_value() ? (*_depth)[i] : 0;
624 if (depth > last_null_depth) {
continue; }
626 bool const ascending =
627 _column_order.has_value() ? (*_column_order)[i] == order::ASCENDING :
true;
630 _null_precedence.has_value() ? (*_null_precedence)[i] : null_order::BEFORE;
635 auto const [l_dremel_i, r_dremel_i] =
636 _lhs.column(i).type().id() == type_id::LIST
637 ? cuda::std::make_tuple(optional_dremel_view(_l_dremel[list_column_index]),
638 optional_dremel_view(_r_dremel[list_column_index]))
639 : cuda::std::make_tuple(optional_dremel_view{}, optional_dremel_view{});
651 cuda::std::tie(state, last_null_depth) =
654 if (state == weak_ordering::EQUIVALENT) {
continue; }
658 : (state == weak_ordering::GREATER ? weak_ordering::LESS : weak_ordering::GREATER);
660 return weak_ordering::EQUIVALENT;
668 Nullate
const _check_nulls;
669 cuda::std::optional<device_span<int const>>
const _depth;
670 cuda::std::optional<device_span<order const>>
const _column_order;
671 cuda::std::optional<device_span<null_order const>>
const _null_precedence;
672 PhysicalElementComparator
const _comparator;
686 struct weak_ordering_comparator_impl {
687 static_assert(not((weak_ordering::EQUIVALENT == values) && ...),
688 "weak_ordering_comparator should not be used for pure equality comparisons. The "
689 "`row_equality_comparator` should be used instead");
691 template <
typename LhsType,
typename RhsType>
692 __device__ constexpr
bool operator()(LhsType
const lhs_index,
693 RhsType
const rhs_index)
const noexcept
695 weak_ordering const result = comparator(lhs_index, rhs_index);
696 return ((result == values) || ...);
698 Comparator
const comparator;
707 template <
typename Comparator>
708 struct less_comparator : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS> {
715 : weak_ordering_comparator_impl<Comparator,
weak_ordering::LESS>{comparator}
727 template <
typename Comparator>
729 : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS, weak_ordering::EQUIVALENT> {
799 static std::pair<std::shared_ptr<preprocessed_table>, std::shared_ptr<preprocessed_table>>
create(
829 static std::shared_ptr<preprocessed_table> create(
831 std::vector<int>&& verticalized_col_depths,
832 std::vector<std::unique_ptr<column>>&& transformed_columns,
835 bool has_ranked_children,
868 std::vector<detail::dremel_data>&& dremel_data,
870 std::vector<std::unique_ptr<column>>&& transformed_columns,
871 bool has_ranked_children);
877 std::vector<std::unique_ptr<column>>&& transformed_columns,
878 bool has_ranked_children);
893 [[nodiscard]] cuda::std::optional<device_span<order const>> column_order()
const
895 return _column_order.size() ? cuda::std::optional<device_span<order const>>(_column_order)
896 : cuda::std::nullopt;
906 [[nodiscard]] cuda::std::optional<device_span<null_order const>> null_precedence()
const
908 return _null_precedence.size()
909 ? cuda::std::optional<device_span<null_order const>>(_null_precedence)
910 : cuda::std::nullopt;
921 [[nodiscard]] cuda::std::optional<device_span<int const>> depths()
const
923 return _depths.size() ? cuda::std::optional<device_span<int const>>(_depths)
924 : cuda::std::nullopt;
929 if (_dremel_device_views.has_value()) {
936 template <
typename PhysicalElementComparator>
937 void check_physical_element_comparator()
939 if constexpr (!std::is_same_v<PhysicalElementComparator, sorting_physical_element_comparator>) {
941 "The input table has nested type children and they were transformed using a "
942 "different type of physical element comparator.");
947 table_device_view_owner
const _t;
953 cuda::std::optional<std::vector<detail::dremel_data>> _dremel_data;
954 cuda::std::optional<rmm::device_uvector<detail::dremel_device_view>> _dremel_device_views;
958 std::vector<std::unique_ptr<column>> _transformed_columns;
962 bool const _has_ranked_children;
998 : d_t{preprocessed_table::create(t, column_order, null_precedence, stream)}
1041 template <
bool has_nested_columns,
1044 auto less(Nullate
nullate = {}, PhysicalElementComparator comparator = {})
const
1046 d_t->check_physical_element_comparator<PhysicalElementComparator>();
1048 return less_comparator{
1049 device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1053 d_t->dremel_device_views(),
1054 d_t->dremel_device_views(),
1056 d_t->column_order(),
1057 d_t->null_precedence(),
1062 template <
bool has_nested_columns,
1064 typename PhysicalElementComparator = sorting_physical_element_comparator>
1067 d_t->check_physical_element_comparator<PhysicalElementComparator>();
1069 return less_equivalent_comparator{
1070 device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1074 d_t->dremel_device_views(),
1075 d_t->dremel_device_views(),
1077 d_t->column_order(),
1078 d_t->null_precedence(),
1083 std::shared_ptr<preprocessed_table> d_t;
1087 template <
typename Comparator>
1088 struct strong_index_comparator_adapter {
1089 strong_index_comparator_adapter(Comparator
const& comparator) : comparator{comparator} {}
1091 __device__ constexpr
weak_ordering operator()(lhs_index_type
const lhs_index,
1092 rhs_index_type
const rhs_index)
const noexcept
1098 __device__ constexpr
weak_ordering operator()(rhs_index_type
const rhs_index,
1099 lhs_index_type
const lhs_index)
const noexcept
1101 auto const left_right_ordering =
1105 if (left_right_ordering == weak_ordering::LESS) {
1106 return weak_ordering::GREATER;
1107 }
else if (left_right_ordering == weak_ordering::GREATER) {
1108 return weak_ordering::LESS;
1110 return weak_ordering::EQUIVALENT;
1113 Comparator
const comparator;
1172 std::shared_ptr<preprocessed_table> right)
1173 : d_left_table{std::move(left)}, d_right_table{std::move(right)}
1213 template <
bool has_nested_columns,
1216 auto less(Nullate
nullate = {}, PhysicalElementComparator comparator = {})
const
1218 d_left_table->check_physical_element_comparator<PhysicalElementComparator>();
1219 d_right_table->check_physical_element_comparator<PhysicalElementComparator>();
1221 return less_comparator{strong_index_comparator_adapter{
1222 device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1226 d_left_table->dremel_device_views(),
1227 d_right_table->dremel_device_views(),
1228 d_left_table->depths(),
1229 d_left_table->column_order(),
1230 d_left_table->null_precedence(),
1237 typename PhysicalElementComparator = sorting_physical_element_comparator>
1240 d_left_table->check_physical_element_comparator<PhysicalElementComparator>();
1241 d_right_table->check_physical_element_comparator<PhysicalElementComparator>();
1243 return less_equivalent_comparator{strong_index_comparator_adapter{
1244 device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1248 d_left_table->dremel_device_views(),
1249 d_right_table->dremel_device_views(),
1250 d_left_table->depths(),
1251 d_left_table->column_order(),
1252 d_left_table->null_precedence(),
1257 std::shared_ptr<preprocessed_table> d_left_table;
1258 std::shared_ptr<preprocessed_table> d_right_table;
1267 namespace equality {
1284 template <
typename Element>
1285 __device__ constexpr
bool operator()(Element
const lhs, Element
const rhs)
const noexcept
1303 template <
typename Element, CUDF_ENABLE_IF(not std::is_
floating_po
int_v<Element>)>
1304 __device__ constexpr
bool operator()(Element
const lhs, Element
const rhs)
const noexcept
1318 template <
typename Element, CUDF_ENABLE_IF(std::is_
floating_po
int_v<Element>)>
1319 __device__ constexpr
bool operator()(Element
const lhs, Element
const rhs)
const noexcept
1321 return isnan(lhs) and isnan(rhs) ? true : lhs == rhs;
1346 template <
bool has_nested_columns,
1348 typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
1363 size_type const rhs_index)
const noexcept
1368 element_comparator{check_nulls, l, r, nulls_are_equal, comparator},
1373 return thrust::equal(thrust::seq, lhs.
begin(), lhs.
end(), rhs.
begin(), equal_elements);
1391 PhysicalEqualityComparator comparator = {}) noexcept
1394 check_nulls{check_nulls},
1395 nulls_are_equal{nulls_are_equal},
1396 comparator{comparator}
1403 class element_comparator {
1417 __device__ element_comparator(Nullate check_nulls,
1418 column_device_view lhs,
1419 column_device_view rhs,
1421 PhysicalEqualityComparator comparator = {}) noexcept
1424 check_nulls{check_nulls},
1425 nulls_are_equal{nulls_are_equal},
1426 comparator{comparator}
1438 template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
1439 __device__
bool operator()(
size_type const lhs_element_index,
1440 size_type const rhs_element_index)
const noexcept
1443 bool const lhs_is_null{lhs.is_null(lhs_element_index)};
1444 bool const rhs_is_null{rhs.is_null(rhs_element_index)};
1445 if (lhs_is_null and rhs_is_null) {
1446 return nulls_are_equal == null_equality::EQUAL;
1447 }
else if (lhs_is_null != rhs_is_null) {
1452 return comparator(lhs.element<Element>(lhs_element_index),
1453 rhs.element<Element>(rhs_element_index));
1456 template <
typename Element,
1457 CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>() and
1458 (not has_nested_columns or not cudf::is_nested<Element>())),
1460 __device__
bool operator()(Args...)
1462 CUDF_UNREACHABLE(
"Attempted to compare elements of uncomparable types.");
1465 template <typename Element, CUDF_ENABLE_IF(has_nested_columns and cudf::is_nested<Element>())>
1466 __device__
bool operator()(
size_type const lhs_element_index,
1467 size_type const rhs_element_index)
const noexcept
1469 column_device_view lcol = lhs.slice(lhs_element_index, 1);
1470 column_device_view rcol = rhs.slice(rhs_element_index, 1);
1471 while (lcol.type().id() == type_id::STRUCT || lcol.type().id() == type_id::LIST) {
1473 auto lvalid = detail::make_validity_iterator<true>(lcol);
1474 auto rvalid = detail::make_validity_iterator<true>(rcol);
1475 if (nulls_are_equal == null_equality::UNEQUAL) {
1477 thrust::seq, lvalid, lvalid + lcol.size(), cuda::std::logical_not<bool>()) or
1479 thrust::seq, rvalid, rvalid + rcol.size(), cuda::std::logical_not<bool>())) {
1483 if (not thrust::equal(thrust::seq, lvalid, lvalid + lcol.size(), rvalid)) {
1488 if (lcol.type().id() == type_id::STRUCT) {
1489 if (lcol.num_child_columns() == 0) {
return true; }
1491 lcol = detail::structs_column_device_view(lcol).get_sliced_child(0);
1492 rcol = detail::structs_column_device_view(rcol).get_sliced_child(0);
1493 }
else if (lcol.type().id() == type_id::LIST) {
1494 auto l_list_col = detail::lists_column_device_view(lcol);
1495 auto r_list_col = detail::lists_column_device_view(rcol);
1499 if (not thrust::equal(thrust::seq, lsizes, lsizes + lcol.size(), rsizes)) {
1503 lcol = l_list_col.get_sliced_child();
1504 rcol = r_list_col.get_sliced_child();
1505 if (lcol.size() != rcol.size()) {
return false; }
1509 auto comp = column_comparator{
1510 element_comparator{check_nulls, lcol, rcol, nulls_are_equal, comparator}, lcol.size()};
1511 return type_dispatcher<dispatch_void_if_nested>(lcol.type(), comp);
1522 struct column_comparator {
1523 element_comparator
const comp;
1531 template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
1532 __device__
bool operator()()
const noexcept
1534 return thrust::all_of(thrust::seq,
1535 thrust::make_counting_iterator(0),
1536 thrust::make_counting_iterator(0) + size,
1537 [
this](
auto i) {
return comp.template operator()<Element>(i, i); });
1540 template <
typename Element,
1541 CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>()),
1543 __device__
bool operator()(Args...) const noexcept
1545 CUDF_UNREACHABLE(
"Attempted to compare elements of uncomparable types.");
1549 column_device_view
const lhs;
1550 column_device_view
const rhs;
1551 Nullate
const check_nulls;
1553 PhysicalEqualityComparator
const comparator;
1556 table_device_view
const lhs;
1557 table_device_view
const rhs;
1558 Nullate
const check_nulls;
1560 PhysicalEqualityComparator
const comparator;
1588 friend class ::cudf::row::primitive::row_equality_comparator;
1590 template <
template <
typename>
class Hash>
1591 friend class ::cudf::row::primitive::row_hasher;
1593 using table_device_view_owner =
1597 std::vector<rmm::device_buffer>&& null_buffers,
1598 std::vector<std::unique_ptr<column>>&& tmp_columns)
1599 : _t(std::move(
table)),
1600 _null_buffers(std::move(null_buffers)),
1601 _tmp_columns(std::move(tmp_columns))
1612 table_device_view_owner _t;
1613 std::vector<rmm::device_buffer> _null_buffers;
1614 std::vector<std::unique_ptr<column>> _tmp_columns;
1672 template <
bool has_nested_columns,
1677 PhysicalEqualityComparator comparator = {})
const noexcept
1679 return device_row_comparator<has_nested_columns, Nullate, PhysicalEqualityComparator>{
1680 nullate, *d_t, *d_t, nulls_are_equal, comparator};
1684 std::shared_ptr<preprocessed_table> d_t;
1688 template <
typename Comparator>
1689 struct strong_index_comparator_adapter {
1690 strong_index_comparator_adapter(Comparator
const& comparator) : comparator{comparator} {}
1692 __device__ constexpr
bool operator()(lhs_index_type
const lhs_index,
1693 rhs_index_type
const rhs_index)
const noexcept
1699 __device__ constexpr
bool operator()(rhs_index_type
const rhs_index,
1700 lhs_index_type
const lhs_index)
const noexcept
1702 return this->operator()(lhs_index, rhs_index);
1705 Comparator
const comparator;
1751 std::shared_ptr<preprocessed_table> right)
1752 : d_left_table{std::move(left)}, d_right_table{std::move(right)}
1786 template <
bool has_nested_columns,
1791 PhysicalEqualityComparator comparator = {})
const noexcept
1793 return strong_index_comparator_adapter{
1794 device_row_comparator<has_nested_columns, Nullate, PhysicalEqualityComparator>(
1795 nullate, *d_left_table, *d_right_table, nulls_are_equal, comparator)};
1799 std::shared_ptr<preprocessed_table> d_left_table;
1800 std::shared_ptr<preprocessed_table> d_right_table;
1813 template <
template <
typename>
class hash_function,
typename Nullate>
1825 uint32_t seed = DEFAULT_HASH_SEED,
1826 hash_value_type null_hash = cuda::std::numeric_limits<hash_value_type>::max()) noexcept
1827 : _check_nulls(nulls), _seed(seed), _null_hash(null_hash)
1839 template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
1843 if (_check_nulls && col.
is_null(row_index)) {
return _null_hash; }
1844 return hash_function<T>{_seed}(col.
element<T>(row_index));
1855 template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
1859 CUDF_UNREACHABLE(
"Unsupported type in hash.");
1873 template <
template <
typename>
class hash_function,
typename Nullate>
1887 thrust::make_transform_iterator(_table.begin(), [row_index,
this](
auto const&
column) {
1888 return cudf::type_dispatcher<dispatch_storage_type>(
1890 element_hasher_adapter<hash_function>{_check_nulls, _seed},
1896 return detail::accumulate(it, it + _table.num_columns(), _seed, [](
auto hash,
auto h) {
1897 return cudf::hashing::detail::hash_combine(hash, h);
1909 template <
template <
typename>
class hash_fn>
1910 class element_hasher_adapter {
1911 static constexpr
hash_value_type NULL_HASH = cuda::std::numeric_limits<hash_value_type>::max();
1915 __device__ element_hasher_adapter(Nullate check_nulls, uint32_t seed) noexcept
1916 : _element_hasher(check_nulls, seed), _check_nulls(check_nulls)
1920 template <typename T, CUDF_ENABLE_IF(not cudf::is_nested<T>())>
1924 return _element_hasher.template operator()<T>(col, row_index);
1927 template <typename T, CUDF_ENABLE_IF(cudf::is_nested<T>())>
1932 column_device_view curr_col = col.
slice(row_index, 1);
1933 while (curr_col.type().id() == type_id::STRUCT || curr_col.type().id() == type_id::LIST) {
1935 auto validity_it = detail::make_validity_iterator<true>(curr_col);
1936 hash = detail::accumulate(
1937 validity_it, validity_it + curr_col.size(), hash, [](
auto hash,
auto is_valid) {
1938 return cudf::hashing::detail::hash_combine(hash,
1939 is_valid ? NON_NULL_HASH : NULL_HASH);
1942 if (curr_col.type().id() == type_id::STRUCT) {
1943 if (curr_col.num_child_columns() == 0) {
return hash; }
1945 curr_col = detail::structs_column_device_view(curr_col).get_sliced_child(0);
1946 }
else if (curr_col.type().id() == type_id::LIST) {
1947 auto list_col = detail::lists_column_device_view(curr_col);
1949 hash = detail::accumulate(
1950 list_sizes, list_sizes + list_col.size(), hash, [](
auto hash,
auto size) {
1951 return cudf::hashing::detail::hash_combine(hash, hash_fn<size_type>{}(size));
1953 curr_col = list_col.get_sliced_child();
1956 for (
int i = 0; i < curr_col.size(); ++i) {
1957 hash = cudf::hashing::detail::hash_combine(
1959 type_dispatcher<dispatch_void_if_nested>(curr_col.type(), _element_hasher, curr_col, i));
1964 element_hasher<hash_fn, Nullate>
const _element_hasher;
1965 Nullate
const _check_nulls;
1969 table_device_view t,
1970 uint32_t seed = DEFAULT_HASH_SEED) noexcept
1971 : _check_nulls{check_nulls}, _table{t}, _seed(seed)
1975 Nullate
const _check_nulls;
1976 table_device_view
const _table;
1977 uint32_t
const _seed;
1983 using preprocessed_table = row::equality::preprocessed_table;
2012 row_hasher(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
2027 template <
typename>
class hash_function = cudf::hashing::detail::default_hash,
2028 template <
template <
typename>
class,
typename>
class DeviceRowHasher =
device_row_hasher,
2031 uint32_t seed = DEFAULT_HASH_SEED)
const
2033 return DeviceRowHasher<hash_function, Nullate>(
nullate, *d_t, seed);
2037 std::shared_ptr<preprocessed_table> d_t;
An immutable, non-owning view of device data as a column of elements that is trivially copyable and u...
T element(size_type element_index) const noexcept
Returns a copy of the element at the specified index.
CUDF_HOST_DEVICE column_device_view slice(size_type offset, size_type size) const noexcept
Get a new column_device_view which is a slice of this column.
CUDF_HOST_DEVICE size_type num_child_columns() const noexcept
Returns the number of child columns.
const_iterator< T > begin() const
Return an iterator to the first element of the column.
const_iterator< T > end() const
Returns an iterator to the element following the last element of the column.
A container of nullable device data as a column of elements.
constexpr CUDF_HOST_DEVICE type_id id() const noexcept
Returns the type identifier.
CUDF_HOST_DEVICE data_type type() const noexcept
Returns the element type.
bool is_null(size_type element_index) const noexcept
Returns whether the specified element is null.
CUDF_HOST_DEVICE bool nullable() const noexcept
Indicates whether the column can contain null elements, i.e., if it has an allocated bitmask.
Given a column_device_view, an instance of this class provides a wrapper on this compound column for ...
column_device_view get_sliced_child() const
Fetches the child column of the underlying list column with offset and size applied.
Given a column_device_view, an instance of this class provides a wrapper on this compound column for ...
column_device_view get_sliced_child(size_type idx) const
Fetches the child column of the underlying struct column.
Computes the equality comparison between 2 rows.
constexpr bool operator()(size_type const lhs_index, size_type const rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table is equal to the row at rhs_index in the rhs tabl...
Comparator for performing equality comparisons between two rows of the same table.
self_comparator(std::shared_ptr< preprocessed_table > t)
Construct an owning object for performing equality comparisons between two rows of the same table.
self_comparator(table_view const &t, rmm::cuda_stream_view stream)
Construct an owning object for performing equality comparisons between two rows of the same table.
auto equal_to(Nullate nullate={}, null_equality nulls_are_equal=null_equality::EQUAL, PhysicalEqualityComparator comparator={}) const noexcept
Get the comparison operator to use on the device.
An owning object that can be used to equality compare rows of two different tables.
two_table_comparator(std::shared_ptr< preprocessed_table > left, std::shared_ptr< preprocessed_table > right)
Construct an owning object for performing equality comparisons between two rows from two tables.
auto equal_to(Nullate nullate={}, null_equality nulls_are_equal=null_equality::EQUAL, PhysicalEqualityComparator comparator={}) const noexcept
Return the binary operator for comparing rows in the table.
two_table_comparator(table_view const &left, table_view const &right, rmm::cuda_stream_view stream)
Construct an owning object for performing equality comparisons between two rows from two tables.
Computes the hash value of a row in the given table.
auto operator()(size_type row_index) const noexcept
Return the hash value of a row in the given table.
Computes the hash value of an element in the given column.
uint32_t _seed
The seed to use for hashing.
element_hasher(Nullate nulls, uint32_t seed=DEFAULT_HASH_SEED, hash_value_type null_hash=cuda::std::numeric_limits< hash_value_type >::max()) noexcept
Constructs an element_hasher object.
Nullate _check_nulls
Whether to check for nulls.
hash_value_type _null_hash
Hash value to use for null elements.
Computes the hash value of a row in the given table.
DeviceRowHasher< hash_function, Nullate > device_hasher(Nullate nullate={}, uint32_t seed=DEFAULT_HASH_SEED) const
Get the hash operator to use on the device.
row_hasher(table_view const &t, rmm::cuda_stream_view stream)
Construct an owning object for hashing the rows of a table.
row_hasher(std::shared_ptr< preprocessed_table > t)
Construct an owning object for hashing the rows of a table from an existing preprocessed_table.
Performs a relational comparison between two elements in two columns.
cuda::std::pair< weak_ordering, int > operator()(size_type lhs_element_index, size_type rhs_element_index)
Compares two list-type columns.
cuda::std::pair< weak_ordering, int > operator()(size_type const lhs_element_index, size_type const rhs_element_index) const noexcept
Performs a relational comparison between the specified elements.
cuda::std::pair< weak_ordering, int > operator()(size_type const, size_type const) const noexcept
Throws run-time error when columns types cannot be compared or if this class is instantiated with has...
element_comparator(Nullate check_nulls, column_device_view lhs, column_device_view rhs, null_order null_precedence=null_order::BEFORE, int depth=0, PhysicalElementComparator comparator={}, optional_dremel_view l_dremel_device_view={}, optional_dremel_view r_dremel_device_view={})
Construct type-dispatched function object for performing a relational comparison between two elements...
Computes the lexicographic comparison between 2 rows.
constexpr weak_ordering operator()(size_type const lhs_index, size_type const rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table compares lexicographically less,...
device_row_comparator(Nullate check_nulls, table_device_view lhs, table_device_view rhs, cuda::std::optional< device_span< order const >> column_order=cuda::std::nullopt, cuda::std::optional< device_span< null_order const >> null_precedence=cuda::std::nullopt, PhysicalElementComparator comparator={}) noexcept
Construct a function object for performing a lexicographic comparison between the rows of two tables....
device_row_comparator(Nullate check_nulls, table_device_view lhs, table_device_view rhs, device_span< detail::dremel_device_view const > l_dremel_device_views, device_span< detail::dremel_device_view const > r_dremel_device_views, cuda::std::optional< device_span< int const >> depth=cuda::std::nullopt, cuda::std::optional< device_span< order const >> column_order=cuda::std::nullopt, cuda::std::optional< device_span< null_order const >> null_precedence=cuda::std::nullopt, PhysicalElementComparator comparator={}) noexcept
Construct a function object for performing a lexicographic comparison between the rows of two tables.
An owning object that can be used to lexicographically compare two rows of the same table.
auto less_equivalent(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
self_comparator(table_view const &t, host_span< order const > column_order={}, host_span< null_order const > null_precedence={}, rmm::cuda_stream_view stream=cudf::get_default_stream())
Construct an owning object for performing a lexicographic comparison between two rows of the same tab...
self_comparator(std::shared_ptr< preprocessed_table > t)
Construct an owning object for performing a lexicographic comparison between two rows of the same pre...
auto less(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
An owning object that can be used to lexicographically compare rows of two different tables.
two_table_comparator(std::shared_ptr< preprocessed_table > left, std::shared_ptr< preprocessed_table > right)
Construct an owning object for performing a lexicographic comparison between two rows of the same pre...
auto less(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
two_table_comparator(table_view const &left, table_view const &right, host_span< order const > column_order={}, host_span< null_order const > null_precedence={}, rmm::cuda_stream_view stream=cudf::get_default_stream())
Construct an owning object for performing a lexicographic comparison between rows of two different ta...
auto less_equivalent(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
Table device view that is usable in device memory.
A set of cudf::column_view's of the same size.
A set of cudf::column's of the same size.
Column device view class definitions.
uint32_t hash_value_type
Type of hash value.
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
CUDF_HOST_DEVICE constexpr decltype(auto) __forceinline__ type_dispatcher(cudf::data_type dtype, Functor f, Ts &&... args)
Invokes an operator() template with the type instantiation based on the specified cudf::data_type's i...
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
null_order
Indicates how null values compare against all other values.
null_equality
Enum to consider two nulls as equal or unequal.
int32_t size_type
Row index type for columns and tables.
#define CUDF_ENABLE_IF(...)
Convenience macro for SFINAE as an unnamed template parameter.
weak_ordering
Result type of the element_relational_comparator function object.
@ EQUIVALENT
Indicates a is ordered neither before nor after b
auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence)
Compare the nulls according to null order.
CUDF_HOST_DEVICE auto make_list_size_iterator(detail::lists_column_device_view const &c)
Makes an iterator that returns size of the list by row index.
Column APIs for sort and rank.
Device version of C++20 std::span with reduced feature set.
A map from cudf::type_id to cudf type that excludes LIST and STRUCT types.
std::conditional_t< t==type_id::STRUCT or t==type_id::LIST, void, id_to_type< t > > type
The type to dispatch to if the type is nested.
Equality comparator functor that compares physical values rather than logical elements like lists,...
constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
Operator for equality comparison of non-floating point values.
Equality comparator functor that compares physical values rather than logical elements like lists,...
constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
Operator for equality comparisons.
Preprocessed table for use with row equality comparison or row hashing.
static std::shared_ptr< preprocessed_table > create(table_view const &table, rmm::cuda_stream_view stream)
Factory to construct preprocessed_table for use with row equality comparison or row hashing.
Wraps and interprets the result of device_row_comparator, true if the result is weak_ordering::LESS m...
less_comparator(Comparator const &comparator)
Constructs a less_comparator.
Wraps and interprets the result of device_row_comparator, true if the result is weak_ordering::LESS o...
less_equivalent_comparator(Comparator const &comparator)
Constructs a less_equivalent_comparator.
Computes a weak ordering of two values with special sorting behavior.
constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
Operator for relational comparisons.
Preprocessed table for use with lexicographical comparison.
std::invoke_result_t< decltype(table_device_view::create), table_view, rmm::cuda_stream_view > table_device_view_owner
Type of table device view owner for the preprocessed table.
static std::pair< std::shared_ptr< preprocessed_table >, std::shared_ptr< preprocessed_table > > create(table_view const &lhs, table_view const &rhs, host_span< order const > column_order, host_span< null_order const > null_precedence, rmm::cuda_stream_view stream)
Preprocess tables for use with lexicographical comparison.
static std::shared_ptr< preprocessed_table > create(table_view const &table, host_span< order const > column_order, host_span< null_order const > null_precedence, rmm::cuda_stream_view stream)
Preprocess table for use with lexicographical comparison.
Relational comparator functor that compares physical values rather than logical elements like lists,...
constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
Operator for relational comparison of non-floating point values.
A counting iterator that uses strongly typed indices bound to tables.
thrust::iterator_adaptor< strong_index_iterator< Index >, Index > super_t
The base class.
constexpr strong_index_iterator(Underlying n)
Constructs a strong index iterator.
C++20 std::span with reduced feature set.
Indicates the presence of nulls at compile-time or runtime.
Table device view class definitions.
bool has_nested_columns(table_view const &table)
Determine if any nested columns exist in a given table.
Defines the mapping between cudf::type_id runtime type information and concrete C++ types.
#define CUDF_HOST_DEVICE
Indicates that the function or method is usable on host and device.