20 #include <cudf/detail/iterator.cuh>
21 #include <cudf/detail/utilities/algorithm.cuh>
22 #include <cudf/detail/utilities/assert.cuh>
23 #include <cudf/hashing/detail/default_hash.cuh>
24 #include <cudf/hashing/detail/hashing.hpp>
25 #include <cudf/lists/detail/dremel.hpp>
26 #include <cudf/lists/list_device_view.cuh>
27 #include <cudf/lists/lists_column_device_view.cuh>
29 #include <cudf/structs/structs_column_device_view.cuh>
30 #include <cudf/table/row_operators.cuh>
36 #include <cuda/std/limits>
37 #include <cuda/std/optional>
38 #include <cuda/std/tuple>
39 #include <cuda/std/utility>
40 #include <thrust/detail/use_default.h>
41 #include <thrust/equal.h>
42 #include <thrust/execution_policy.h>
43 #include <thrust/functional.h>
44 #include <thrust/iterator/counting_iterator.h>
45 #include <thrust/iterator/iterator_adaptor.h>
46 #include <thrust/iterator/iterator_categories.h>
47 #include <thrust/iterator/iterator_facade.h>
48 #include <thrust/iterator/transform_iterator.h>
49 #include <thrust/logical.h>
50 #include <thrust/swap.h>
51 #include <thrust/transform_reduce.h>
54 #include <type_traits>
56 namespace CUDF_EXPORT
cudf {
58 namespace experimental {
75 template <cudf::type_
id t>
78 using type = std::conditional_t<t == type_id::STRUCT or t == type_id::LIST, void, id_to_type<t>>;
99 template <
typename Index,
typename Underlying = std::underlying_type_t<Index>>
103 thrust::random_access_traversal_tag,
107 thrust::iterator_adaptor<strong_index_iterator<Index>, Index>;
116 friend class thrust::iterator_core_access;
119 __device__ constexpr
void increment() { ++begin; }
120 __device__ constexpr
void decrement() { --begin; }
122 __device__ constexpr
void advance(Underlying n) { begin += n; }
124 __device__ constexpr
bool equal(strong_index_iterator<Index>
const& other)
const noexcept
126 return begin == other.begin;
129 __device__ constexpr Index dereference() const noexcept {
return static_cast<Index
>(begin); }
131 __device__ constexpr Underlying distance_to(
132 strong_index_iterator<Index>
const& other)
const noexcept
134 return other.begin - begin;
143 using lhs_iterator = strong_index_iterator<lhs_index_type>;
148 using rhs_iterator = strong_index_iterator<rhs_index_type>;
150 namespace lexicographic {
167 template <
typename Element>
170 return detail::compare_elements(lhs, rhs);
187 template <
typename Element, CUDF_ENABLE_IF(not std::is_
floating_po
int_v<Element>)>
190 return detail::compare_elements(lhs, rhs);
200 template <
typename Element, CUDF_ENABLE_IF(std::is_
floating_po
int_v<Element>)>
204 return isnan(rhs) ? weak_ordering::EQUIVALENT : weak_ordering::GREATER;
205 }
else if (isnan(rhs)) {
206 return weak_ordering::LESS;
209 return detail::compare_elements(lhs, rhs);
213 using optional_dremel_view = cuda::std::optional<detail::dremel_device_view const>;
262 template <
bool has_nested_columns,
264 typename PhysicalElementComparator = sorting_physical_element_comparator>
298 PhysicalElementComparator comparator = {}) noexcept
301 _l_dremel(l_dremel_device_views),
302 _r_dremel(r_dremel_device_views),
303 _check_nulls{check_nulls},
305 _column_order{column_order},
306 _null_precedence{null_precedence},
307 _comparator{comparator}
329 template <
bool nested_disable = not has_nested_columns, CUDF_ENABLE_IF(nested_disable)>
336 PhysicalElementComparator comparator = {}) noexcept
341 _check_nulls{check_nulls},
343 _column_order{column_order},
344 _null_precedence{null_precedence},
345 _comparator{comparator}
373 null_order null_precedence = null_order::BEFORE,
375 PhysicalElementComparator comparator = {},
376 optional_dremel_view l_dremel_device_view = {},
377 optional_dremel_view r_dremel_device_view = {})
380 _check_nulls{check_nulls},
381 _null_precedence{null_precedence},
383 _l_dremel_device_view{l_dremel_device_view},
384 _r_dremel_device_view{r_dremel_device_view},
385 _comparator{comparator}
397 template <
typename Element,
398 CUDF_ENABLE_IF(cudf::is_relationally_comparable<Element, Element>())>
403 bool const lhs_is_null{_lhs.is_null(lhs_element_index)};
404 bool const rhs_is_null{_rhs.is_null(rhs_element_index)};
406 if (lhs_is_null or rhs_is_null) {
407 return cuda::std::pair(
null_compare(lhs_is_null, rhs_is_null, _null_precedence), _depth);
411 return cuda::std::pair(_comparator(_lhs.element<Element>(lhs_element_index),
412 _rhs.element<Element>(rhs_element_index)),
413 cuda::std::numeric_limits<int>::max());
423 template <
typename Element,
424 CUDF_ENABLE_IF(not cudf::is_relationally_comparable<Element, Element>() and
425 (not has_nested_columns or not cudf::is_nested<Element>()))>
429 CUDF_UNREACHABLE(
"Attempted to compare elements of uncomparable types.");
440 template <
typename Element,
441 CUDF_ENABLE_IF(has_nested_columns and std::is_same_v<Element, cudf::struct_view>)>
448 while (lcol.
type().
id() == type_id::STRUCT) {
449 bool const lhs_is_null{lcol.
is_null(lhs_element_index)};
450 bool const rhs_is_null{rcol.
is_null(rhs_element_index)};
452 if (lhs_is_null or rhs_is_null) {
454 return cuda::std::pair(state, depth);
458 return cuda::std::pair(weak_ordering::EQUIVALENT, cuda::std::numeric_limits<int>::max());
467 return cudf::type_dispatcher<dispatch_void_if_nested>(
482 template <
typename Element,
483 CUDF_ENABLE_IF(has_nested_columns and std::is_same_v<Element, cudf::list_view>)>
488 auto const is_l_row_null = _lhs.is_null(lhs_element_index);
489 auto const is_r_row_null = _rhs.is_null(rhs_element_index);
490 if (is_l_row_null || is_r_row_null) {
491 return cuda::std::pair(
null_compare(is_l_row_null, is_r_row_null, _null_precedence),
496 auto const l_max_def_level = _l_dremel_device_view->max_def_level;
497 auto const r_max_def_level = _r_dremel_device_view->max_def_level;
498 auto const l_def_levels = _l_dremel_device_view->def_levels;
499 auto const r_def_levels = _r_dremel_device_view->def_levels;
500 auto const l_rep_levels = _l_dremel_device_view->rep_levels;
501 auto const r_rep_levels = _r_dremel_device_view->rep_levels;
508 while (lcol.
type().
id() == type_id::LIST) {
516 auto const l_offsets = _l_dremel_device_view->offsets;
517 auto const r_offsets = _r_dremel_device_view->offsets;
518 auto l_start = l_offsets[lhs_element_index];
519 auto l_end = l_offsets[lhs_element_index + 1];
520 auto r_start = r_offsets[rhs_element_index];
521 auto r_end = r_offsets[rhs_element_index + 1];
531 for (
int l_dremel_index = l_start, r_dremel_index = r_start, element_index = 0;
532 l_dremel_index < l_end and r_dremel_index < r_end;
533 ++l_dremel_index, ++r_dremel_index) {
534 auto const l_rep_level = l_rep_levels[l_dremel_index];
535 auto const r_rep_level = r_rep_levels[r_dremel_index];
538 if (l_rep_level != r_rep_level) {
540 return l_rep_level < r_rep_level ? cuda::std::pair(weak_ordering::LESS, _depth)
541 : cuda::std::pair(weak_ordering::GREATER, _depth);
545 auto const l_def_level = l_def_levels[l_dremel_index];
546 auto const r_def_level = r_def_levels[r_dremel_index];
549 if (l_def_level < l_max_def_level || r_def_level < r_max_def_level) {
556 if ((lcol.
nullable() and l_def_levels[l_dremel_index] == l_max_def_level - 1) or
557 (rcol.
nullable() and r_def_levels[r_dremel_index] == r_max_def_level - 1)) {
560 if (l_def_level == r_def_level) {
continue; }
563 return l_def_level < r_def_level ? cuda::std::pair(weak_ordering::LESS, _depth)
564 : cuda::std::pair(weak_ordering::GREATER, _depth);
569 int last_null_depth = _depth;
570 cuda::std::tie(state, last_null_depth) = cudf::type_dispatcher<dispatch_void_if_nested>(
571 lcol.
type(), comparator, element_index, element_index);
572 if (state != weak_ordering::EQUIVALENT) {
return cuda::std::pair(state, _depth); }
583 return cuda::std::pair(detail::compare_elements(l_end - l_start, r_end - r_start), _depth);
589 Nullate
const _check_nulls;
592 optional_dremel_view _l_dremel_device_view;
593 optional_dremel_view _r_dremel_device_view;
594 PhysicalElementComparator
const _comparator;
608 size_type const rhs_index)
const noexcept
610 int last_null_depth = cuda::std::numeric_limits<int>::max();
612 for (
size_type i = 0; i < _lhs.num_columns(); ++i) {
613 if (_lhs.column(i).type().id() == type_id::LIST) { ++list_column_index; }
615 int const depth = _depth.has_value() ? (*_depth)[i] : 0;
616 if (depth > last_null_depth) {
continue; }
618 bool const ascending =
619 _column_order.has_value() ? (*_column_order)[i] == order::ASCENDING :
true;
622 _null_precedence.has_value() ? (*_null_precedence)[i] : null_order::BEFORE;
627 auto const [l_dremel_i, r_dremel_i] =
628 _lhs.column(i).type().id() == type_id::LIST
629 ? cuda::std::make_tuple(optional_dremel_view(_l_dremel[list_column_index]),
630 optional_dremel_view(_r_dremel[list_column_index]))
631 : cuda::std::make_tuple(optional_dremel_view{}, optional_dremel_view{});
643 cuda::std::tie(state, last_null_depth) =
646 if (state == weak_ordering::EQUIVALENT) {
continue; }
650 : (state == weak_ordering::GREATER ? weak_ordering::LESS : weak_ordering::GREATER);
652 return weak_ordering::EQUIVALENT;
660 Nullate
const _check_nulls;
661 cuda::std::optional<device_span<int const>>
const _depth;
662 cuda::std::optional<device_span<order const>>
const _column_order;
663 cuda::std::optional<device_span<null_order const>>
const _null_precedence;
664 PhysicalElementComparator
const _comparator;
678 struct weak_ordering_comparator_impl {
679 static_assert(not((weak_ordering::EQUIVALENT == values) && ...),
680 "weak_ordering_comparator should not be used for pure equality comparisons. The "
681 "`row_equality_comparator` should be used instead");
683 template <
typename LhsType,
typename RhsType>
684 __device__ constexpr
bool operator()(LhsType
const lhs_index,
685 RhsType
const rhs_index)
const noexcept
687 weak_ordering const result = comparator(lhs_index, rhs_index);
688 return ((result == values) || ...);
690 Comparator
const comparator;
699 template <
typename Comparator>
700 struct less_comparator : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS> {
707 : weak_ordering_comparator_impl<Comparator,
weak_ordering::LESS>{comparator}
719 template <
typename Comparator>
721 : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS, weak_ordering::EQUIVALENT> {
791 static std::pair<std::shared_ptr<preprocessed_table>, std::shared_ptr<preprocessed_table>>
create(
821 static std::shared_ptr<preprocessed_table> create(
823 std::vector<int>&& verticalized_col_depths,
824 std::vector<std::unique_ptr<column>>&& transformed_columns,
827 bool has_ranked_children,
860 std::vector<detail::dremel_data>&& dremel_data,
862 std::vector<std::unique_ptr<column>>&& transformed_columns,
863 bool has_ranked_children);
869 std::vector<std::unique_ptr<column>>&& transformed_columns,
870 bool has_ranked_children);
885 [[nodiscard]] cuda::std::optional<device_span<order const>> column_order()
const
887 return _column_order.size() ? cuda::std::optional<device_span<order const>>(_column_order)
888 : cuda::std::nullopt;
898 [[nodiscard]] cuda::std::optional<device_span<null_order const>> null_precedence()
const
900 return _null_precedence.size()
901 ? cuda::std::optional<device_span<null_order const>>(_null_precedence)
902 : cuda::std::nullopt;
913 [[nodiscard]] cuda::std::optional<device_span<int const>> depths()
const
915 return _depths.size() ? cuda::std::optional<device_span<int const>>(_depths)
916 : cuda::std::nullopt;
921 if (_dremel_device_views.has_value()) {
928 template <
typename PhysicalElementComparator>
929 void check_physical_element_comparator()
931 if constexpr (!std::is_same_v<PhysicalElementComparator, sorting_physical_element_comparator>) {
933 "The input table has nested type children and they were transformed using a "
934 "different type of physical element comparator.");
939 table_device_view_owner
const _t;
945 cuda::std::optional<std::vector<detail::dremel_data>> _dremel_data;
946 cuda::std::optional<rmm::device_uvector<detail::dremel_device_view>> _dremel_device_views;
950 std::vector<std::unique_ptr<column>> _transformed_columns;
954 bool const _has_ranked_children;
990 : d_t{preprocessed_table::create(t, column_order, null_precedence, stream)}
1033 template <
bool has_nested_columns,
1036 auto less(Nullate
nullate = {}, PhysicalElementComparator comparator = {})
const
1038 d_t->check_physical_element_comparator<PhysicalElementComparator>();
1040 return less_comparator{
1041 device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1045 d_t->dremel_device_views(),
1046 d_t->dremel_device_views(),
1048 d_t->column_order(),
1049 d_t->null_precedence(),
1054 template <
bool has_nested_columns,
1056 typename PhysicalElementComparator = sorting_physical_element_comparator>
1059 d_t->check_physical_element_comparator<PhysicalElementComparator>();
1061 return less_equivalent_comparator{
1062 device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1066 d_t->dremel_device_views(),
1067 d_t->dremel_device_views(),
1069 d_t->column_order(),
1070 d_t->null_precedence(),
1075 std::shared_ptr<preprocessed_table> d_t;
1079 template <
typename Comparator>
1080 struct strong_index_comparator_adapter {
1081 strong_index_comparator_adapter(Comparator
const& comparator) : comparator{comparator} {}
1083 __device__ constexpr
weak_ordering operator()(lhs_index_type
const lhs_index,
1084 rhs_index_type
const rhs_index)
const noexcept
1090 __device__ constexpr
weak_ordering operator()(rhs_index_type
const rhs_index,
1091 lhs_index_type
const lhs_index)
const noexcept
1093 auto const left_right_ordering =
1097 if (left_right_ordering == weak_ordering::LESS) {
1098 return weak_ordering::GREATER;
1099 }
else if (left_right_ordering == weak_ordering::GREATER) {
1100 return weak_ordering::LESS;
1102 return weak_ordering::EQUIVALENT;
1105 Comparator
const comparator;
1164 std::shared_ptr<preprocessed_table> right)
1165 : d_left_table{std::move(left)}, d_right_table{std::move(right)}
1205 template <
bool has_nested_columns,
1208 auto less(Nullate
nullate = {}, PhysicalElementComparator comparator = {})
const
1210 d_left_table->check_physical_element_comparator<PhysicalElementComparator>();
1211 d_right_table->check_physical_element_comparator<PhysicalElementComparator>();
1213 return less_comparator{strong_index_comparator_adapter{
1214 device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1218 d_left_table->dremel_device_views(),
1219 d_right_table->dremel_device_views(),
1220 d_left_table->depths(),
1221 d_left_table->column_order(),
1222 d_left_table->null_precedence(),
1229 typename PhysicalElementComparator = sorting_physical_element_comparator>
1232 d_left_table->check_physical_element_comparator<PhysicalElementComparator>();
1233 d_right_table->check_physical_element_comparator<PhysicalElementComparator>();
1235 return less_equivalent_comparator{strong_index_comparator_adapter{
1236 device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1240 d_left_table->dremel_device_views(),
1241 d_right_table->dremel_device_views(),
1242 d_left_table->depths(),
1243 d_left_table->column_order(),
1244 d_left_table->null_precedence(),
1249 std::shared_ptr<preprocessed_table> d_left_table;
1250 std::shared_ptr<preprocessed_table> d_right_table;
1259 namespace equality {
1276 template <
typename Element>
1277 __device__ constexpr
bool operator()(Element
const lhs, Element
const rhs)
const noexcept
1295 template <
typename Element, CUDF_ENABLE_IF(not std::is_
floating_po
int_v<Element>)>
1296 __device__ constexpr
bool operator()(Element
const lhs, Element
const rhs)
const noexcept
1310 template <
typename Element, CUDF_ENABLE_IF(std::is_
floating_po
int_v<Element>)>
1311 __device__ constexpr
bool operator()(Element
const lhs, Element
const rhs)
const noexcept
1313 return isnan(lhs) and isnan(rhs) ? true : lhs == rhs;
1338 template <
bool has_nested_columns,
1340 typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
1355 size_type const rhs_index)
const noexcept
1360 element_comparator{check_nulls, l, r, nulls_are_equal, comparator},
1365 return thrust::equal(thrust::seq, lhs.
begin(), lhs.
end(), rhs.
begin(), equal_elements);
1383 PhysicalEqualityComparator comparator = {}) noexcept
1386 check_nulls{check_nulls},
1387 nulls_are_equal{nulls_are_equal},
1388 comparator{comparator}
1395 class element_comparator {
1409 __device__ element_comparator(Nullate check_nulls,
1410 column_device_view lhs,
1411 column_device_view rhs,
1413 PhysicalEqualityComparator comparator = {}) noexcept
1416 check_nulls{check_nulls},
1417 nulls_are_equal{nulls_are_equal},
1418 comparator{comparator}
1430 template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
1431 __device__
bool operator()(
size_type const lhs_element_index,
1432 size_type const rhs_element_index)
const noexcept
1435 bool const lhs_is_null{lhs.is_null(lhs_element_index)};
1436 bool const rhs_is_null{rhs.is_null(rhs_element_index)};
1437 if (lhs_is_null and rhs_is_null) {
1438 return nulls_are_equal == null_equality::EQUAL;
1439 }
else if (lhs_is_null != rhs_is_null) {
1444 return comparator(lhs.element<Element>(lhs_element_index),
1445 rhs.element<Element>(rhs_element_index));
1448 template <
typename Element,
1449 CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>() and
1450 (not has_nested_columns or not cudf::is_nested<Element>())),
1452 __device__
bool operator()(Args...)
1454 CUDF_UNREACHABLE(
"Attempted to compare elements of uncomparable types.");
1457 template <typename Element, CUDF_ENABLE_IF(has_nested_columns and cudf::is_nested<Element>())>
1458 __device__
bool operator()(
size_type const lhs_element_index,
1459 size_type const rhs_element_index)
const noexcept
1461 column_device_view lcol = lhs.slice(lhs_element_index, 1);
1462 column_device_view rcol = rhs.slice(rhs_element_index, 1);
1463 while (lcol.type().id() == type_id::STRUCT || lcol.type().id() == type_id::LIST) {
1465 auto lvalid = detail::make_validity_iterator<true>(lcol);
1466 auto rvalid = detail::make_validity_iterator<true>(rcol);
1467 if (nulls_are_equal == null_equality::UNEQUAL) {
1469 thrust::seq, lvalid, lvalid + lcol.size(), thrust::logical_not<bool>()) or
1471 thrust::seq, rvalid, rvalid + rcol.size(), thrust::logical_not<bool>())) {
1475 if (not thrust::equal(thrust::seq, lvalid, lvalid + lcol.size(), rvalid)) {
1480 if (lcol.type().id() == type_id::STRUCT) {
1481 if (lcol.num_child_columns() == 0) {
return true; }
1483 lcol = detail::structs_column_device_view(lcol).get_sliced_child(0);
1484 rcol = detail::structs_column_device_view(rcol).get_sliced_child(0);
1485 }
else if (lcol.type().id() == type_id::LIST) {
1486 auto l_list_col = detail::lists_column_device_view(lcol);
1487 auto r_list_col = detail::lists_column_device_view(rcol);
1491 if (not thrust::equal(thrust::seq, lsizes, lsizes + lcol.size(), rsizes)) {
1495 lcol = l_list_col.get_sliced_child();
1496 rcol = r_list_col.get_sliced_child();
1497 if (lcol.size() != rcol.size()) {
return false; }
1501 auto comp = column_comparator{
1502 element_comparator{check_nulls, lcol, rcol, nulls_are_equal, comparator}, lcol.size()};
1503 return type_dispatcher<dispatch_void_if_nested>(lcol.type(), comp);
1514 struct column_comparator {
1515 element_comparator
const comp;
1523 template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
1524 __device__
bool operator()()
const noexcept
1526 return thrust::all_of(thrust::seq,
1527 thrust::make_counting_iterator(0),
1528 thrust::make_counting_iterator(0) + size,
1529 [=](
auto i) {
return comp.template operator()<Element>(i, i); });
1532 template <
typename Element,
1533 CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>()),
1535 __device__
bool operator()(Args...) const noexcept
1537 CUDF_UNREACHABLE(
"Attempted to compare elements of uncomparable types.");
1541 column_device_view
const lhs;
1542 column_device_view
const rhs;
1543 Nullate
const check_nulls;
1545 PhysicalEqualityComparator
const comparator;
1548 table_device_view
const lhs;
1549 table_device_view
const rhs;
1550 Nullate
const check_nulls;
1552 PhysicalEqualityComparator
const comparator;
1580 using table_device_view_owner =
1584 std::vector<rmm::device_buffer>&& null_buffers,
1585 std::vector<std::unique_ptr<column>>&& tmp_columns)
1586 : _t(std::move(
table)),
1587 _null_buffers(std::move(null_buffers)),
1588 _tmp_columns(std::move(tmp_columns))
1599 table_device_view_owner _t;
1600 std::vector<rmm::device_buffer> _null_buffers;
1601 std::vector<std::unique_ptr<column>> _tmp_columns;
1659 template <
bool has_nested_columns,
1664 PhysicalEqualityComparator comparator = {})
const noexcept
1666 return device_row_comparator<has_nested_columns, Nullate, PhysicalEqualityComparator>{
1667 nullate, *d_t, *d_t, nulls_are_equal, comparator};
1671 std::shared_ptr<preprocessed_table> d_t;
1675 template <
typename Comparator>
1676 struct strong_index_comparator_adapter {
1677 strong_index_comparator_adapter(Comparator
const& comparator) : comparator{comparator} {}
1679 __device__ constexpr
bool operator()(lhs_index_type
const lhs_index,
1680 rhs_index_type
const rhs_index)
const noexcept
1686 __device__ constexpr
bool operator()(rhs_index_type
const rhs_index,
1687 lhs_index_type
const lhs_index)
const noexcept
1689 return this->operator()(lhs_index, rhs_index);
1692 Comparator
const comparator;
1738 std::shared_ptr<preprocessed_table> right)
1739 : d_left_table{std::move(left)}, d_right_table{std::move(right)}
1773 template <
bool has_nested_columns,
1778 PhysicalEqualityComparator comparator = {})
const noexcept
1780 return strong_index_comparator_adapter{
1781 device_row_comparator<has_nested_columns, Nullate, PhysicalEqualityComparator>(
1782 nullate, *d_left_table, *d_right_table, nulls_are_equal, comparator)};
1786 std::shared_ptr<preprocessed_table> d_left_table;
1787 std::shared_ptr<preprocessed_table> d_right_table;
1800 template <
template <
typename>
class hash_function,
typename Nullate>
1812 uint32_t seed = DEFAULT_HASH_SEED,
1813 hash_value_type null_hash = cuda::std::numeric_limits<hash_value_type>::max()) noexcept
1814 : _check_nulls(nulls), _seed(seed), _null_hash(null_hash)
1826 template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
1830 if (_check_nulls && col.
is_null(row_index)) {
return _null_hash; }
1831 return hash_function<T>{_seed}(col.
element<T>(row_index));
1842 template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
1846 CUDF_UNREACHABLE(
"Unsupported type in hash.");
1860 template <
template <
typename>
class hash_function,
typename Nullate>
1873 auto it = thrust::make_transform_iterator(_table.begin(), [=](
auto const&
column) {
1874 return cudf::type_dispatcher<dispatch_storage_type>(
1876 element_hasher_adapter<hash_function>{_check_nulls, _seed},
1882 return detail::accumulate(it, it + _table.num_columns(), _seed, [](
auto hash,
auto h) {
1883 return cudf::hashing::detail::hash_combine(hash, h);
1895 template <
template <
typename>
class hash_fn>
1896 class element_hasher_adapter {
1897 static constexpr
hash_value_type NULL_HASH = cuda::std::numeric_limits<hash_value_type>::max();
1901 __device__ element_hasher_adapter(Nullate check_nulls, uint32_t seed) noexcept
1902 : _element_hasher(check_nulls, seed), _check_nulls(check_nulls)
1906 template <typename T, CUDF_ENABLE_IF(not cudf::is_nested<T>())>
1910 return _element_hasher.template operator()<T>(col, row_index);
1913 template <typename T, CUDF_ENABLE_IF(cudf::is_nested<T>())>
1918 column_device_view curr_col = col.
slice(row_index, 1);
1919 while (curr_col.type().id() == type_id::STRUCT || curr_col.type().id() == type_id::LIST) {
1921 auto validity_it = detail::make_validity_iterator<true>(curr_col);
1922 hash = detail::accumulate(
1923 validity_it, validity_it + curr_col.size(), hash, [](
auto hash,
auto is_valid) {
1924 return cudf::hashing::detail::hash_combine(hash,
1925 is_valid ? NON_NULL_HASH : NULL_HASH);
1928 if (curr_col.type().id() == type_id::STRUCT) {
1929 if (curr_col.num_child_columns() == 0) {
return hash; }
1931 curr_col = detail::structs_column_device_view(curr_col).get_sliced_child(0);
1932 }
else if (curr_col.type().id() == type_id::LIST) {
1933 auto list_col = detail::lists_column_device_view(curr_col);
1935 hash = detail::accumulate(
1936 list_sizes, list_sizes + list_col.size(), hash, [](
auto hash,
auto size) {
1937 return cudf::hashing::detail::hash_combine(hash, hash_fn<size_type>{}(size));
1939 curr_col = list_col.get_sliced_child();
1942 for (
int i = 0; i < curr_col.size(); ++i) {
1943 hash = cudf::hashing::detail::hash_combine(
1945 type_dispatcher<dispatch_void_if_nested>(curr_col.type(), _element_hasher, curr_col, i));
1950 element_hasher<hash_fn, Nullate>
const _element_hasher;
1951 Nullate
const _check_nulls;
1955 table_device_view t,
1956 uint32_t seed = DEFAULT_HASH_SEED) noexcept
1957 : _check_nulls{check_nulls}, _table{t}, _seed(seed)
1961 Nullate
const _check_nulls;
1962 table_device_view
const _table;
1963 uint32_t
const _seed;
1969 using preprocessed_table = row::equality::preprocessed_table;
1998 row_hasher(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
2012 template <
template <
typename>
class hash_function = cudf::hashing::detail::default_hash,
2013 template <
template <
typename>
class,
typename>
2017 uint32_t seed = DEFAULT_HASH_SEED)
const
2019 return DeviceRowHasher<hash_function, Nullate>(
nullate, *d_t, seed);
2023 std::shared_ptr<preprocessed_table> d_t;
An immutable, non-owning view of device data as a column of elements that is trivially copyable and u...
T element(size_type element_index) const noexcept
Returns reference to element at the specified index.
CUDF_HOST_DEVICE column_device_view slice(size_type offset, size_type size) const noexcept
Get a new column_device_view which is a slice of this column.
CUDF_HOST_DEVICE size_type num_child_columns() const noexcept
Returns the number of child columns.
const_iterator< T > begin() const
Return an iterator to the first element of the column.
const_iterator< T > end() const
Returns an iterator to the element following the last element of the column.
A container of nullable device data as a column of elements.
constexpr CUDF_HOST_DEVICE type_id id() const noexcept
Returns the type identifier.
CUDF_HOST_DEVICE data_type type() const noexcept
Returns the element type.
bool is_null(size_type element_index) const noexcept
Returns whether the specified element is null.
CUDF_HOST_DEVICE bool nullable() const noexcept
Indicates whether the column can contain null elements, i.e., if it has an allocated bitmask.
Given a column_device_view, an instance of this class provides a wrapper on this compound column for ...
column_device_view get_sliced_child() const
Fetches the child column of the underlying list column with offset and size applied.
Given a column_device_view, an instance of this class provides a wrapper on this compound column for ...
column_device_view get_sliced_child(size_type idx) const
Fetches the child column of the underlying struct column.
Computes the equality comparison between 2 rows.
constexpr bool operator()(size_type const lhs_index, size_type const rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table is equal to the row at rhs_index in the rhs tabl...
Comparator for performing equality comparisons between two rows of the same table.
self_comparator(std::shared_ptr< preprocessed_table > t)
Construct an owning object for performing equality comparisons between two rows of the same table.
self_comparator(table_view const &t, rmm::cuda_stream_view stream)
Construct an owning object for performing equality comparisons between two rows of the same table.
auto equal_to(Nullate nullate={}, null_equality nulls_are_equal=null_equality::EQUAL, PhysicalEqualityComparator comparator={}) const noexcept
Get the comparison operator to use on the device.
An owning object that can be used to equality compare rows of two different tables.
two_table_comparator(std::shared_ptr< preprocessed_table > left, std::shared_ptr< preprocessed_table > right)
Construct an owning object for performing equality comparisons between two rows from two tables.
auto equal_to(Nullate nullate={}, null_equality nulls_are_equal=null_equality::EQUAL, PhysicalEqualityComparator comparator={}) const noexcept
Return the binary operator for comparing rows in the table.
two_table_comparator(table_view const &left, table_view const &right, rmm::cuda_stream_view stream)
Construct an owning object for performing equality comparisons between two rows from two tables.
Computes the hash value of a row in the given table.
auto operator()(size_type row_index) const noexcept
Return the hash value of a row in the given table.
Computes the hash value of an element in the given column.
uint32_t _seed
The seed to use for hashing.
element_hasher(Nullate nulls, uint32_t seed=DEFAULT_HASH_SEED, hash_value_type null_hash=cuda::std::numeric_limits< hash_value_type >::max()) noexcept
Constructs an element_hasher object.
Nullate _check_nulls
Whether to check for nulls.
hash_value_type _null_hash
Hash value to use for null elements.
Computes the hash value of a row in the given table.
DeviceRowHasher< hash_function, Nullate > device_hasher(Nullate nullate={}, uint32_t seed=DEFAULT_HASH_SEED) const
Get the hash operator to use on the device.
row_hasher(table_view const &t, rmm::cuda_stream_view stream)
Construct an owning object for hashing the rows of a table.
row_hasher(std::shared_ptr< preprocessed_table > t)
Construct an owning object for hashing the rows of a table from an existing preprocessed_table.
Performs a relational comparison between two elements in two columns.
cuda::std::pair< weak_ordering, int > operator()(size_type lhs_element_index, size_type rhs_element_index)
Compares two list-type columns.
cuda::std::pair< weak_ordering, int > operator()(size_type const lhs_element_index, size_type const rhs_element_index) const noexcept
Performs a relational comparison between the specified elements.
cuda::std::pair< weak_ordering, int > operator()(size_type const, size_type const) const noexcept
Throws run-time error when columns types cannot be compared or if this class is instantiated with has...
element_comparator(Nullate check_nulls, column_device_view lhs, column_device_view rhs, null_order null_precedence=null_order::BEFORE, int depth=0, PhysicalElementComparator comparator={}, optional_dremel_view l_dremel_device_view={}, optional_dremel_view r_dremel_device_view={})
Construct type-dispatched function object for performing a relational comparison between two elements...
Computes the lexicographic comparison between 2 rows.
constexpr weak_ordering operator()(size_type const lhs_index, size_type const rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table compares lexicographically less,...
device_row_comparator(Nullate check_nulls, table_device_view lhs, table_device_view rhs, cuda::std::optional< device_span< order const >> column_order=cuda::std::nullopt, cuda::std::optional< device_span< null_order const >> null_precedence=cuda::std::nullopt, PhysicalElementComparator comparator={}) noexcept
Construct a function object for performing a lexicographic comparison between the rows of two tables....
device_row_comparator(Nullate check_nulls, table_device_view lhs, table_device_view rhs, device_span< detail::dremel_device_view const > l_dremel_device_views, device_span< detail::dremel_device_view const > r_dremel_device_views, cuda::std::optional< device_span< int const >> depth=cuda::std::nullopt, cuda::std::optional< device_span< order const >> column_order=cuda::std::nullopt, cuda::std::optional< device_span< null_order const >> null_precedence=cuda::std::nullopt, PhysicalElementComparator comparator={}) noexcept
Construct a function object for performing a lexicographic comparison between the rows of two tables.
An owning object that can be used to lexicographically compare two rows of the same table.
auto less_equivalent(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
self_comparator(table_view const &t, host_span< order const > column_order={}, host_span< null_order const > null_precedence={}, rmm::cuda_stream_view stream=cudf::get_default_stream())
Construct an owning object for performing a lexicographic comparison between two rows of the same tab...
self_comparator(std::shared_ptr< preprocessed_table > t)
Construct an owning object for performing a lexicographic comparison between two rows of the same pre...
auto less(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
An owning object that can be used to lexicographically compare rows of two different tables.
two_table_comparator(std::shared_ptr< preprocessed_table > left, std::shared_ptr< preprocessed_table > right)
Construct an owning object for performing a lexicographic comparison between two rows of the same pre...
auto less(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
two_table_comparator(table_view const &left, table_view const &right, host_span< order const > column_order={}, host_span< null_order const > null_precedence={}, rmm::cuda_stream_view stream=cudf::get_default_stream())
Construct an owning object for performing a lexicographic comparison between rows of two different ta...
auto less_equivalent(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
Table device view that is usable in device memory.
A set of cudf::column_view's of the same size.
A set of cudf::column's of the same size.
Column device view class definitions.
uint32_t hash_value_type
Type of hash value.
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
CUDF_HOST_DEVICE constexpr decltype(auto) __forceinline__ type_dispatcher(cudf::data_type dtype, Functor f, Ts &&... args)
Invokes an operator() template with the type instantiation based on the specified cudf::data_type's i...
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
null_order
Indicates how null values compare against all other values.
null_equality
Enum to consider two nulls as equal or unequal.
int32_t size_type
Row index type for columns and tables.
#define CUDF_ENABLE_IF(...)
Convenience macro for SFINAE as an unnamed template parameter.
weak_ordering
Result type of the element_relational_comparator function object.
@ EQUIVALENT
Indicates a is ordered neither before nor after b
auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence)
Compare the nulls according to null order.
CUDF_HOST_DEVICE auto make_list_size_iterator(detail::lists_column_device_view const &c)
Makes an iterator that returns size of the list by row index.
Column APIs for sort and rank.
Device version of C++20 std::span with reduced feature set.
A map from cudf::type_id to cudf type that excludes LIST and STRUCT types.
std::conditional_t< t==type_id::STRUCT or t==type_id::LIST, void, id_to_type< t > > type
The type to dispatch to if the type is nested.
Equality comparator functor that compares physical values rather than logical elements like lists,...
constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
Operator for equality comparison of non-floating point values.
Equality comparator functor that compares physical values rather than logical elements like lists,...
constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
Operator for equality comparisons.
Preprocessed table for use with row equality comparison or row hashing.
static std::shared_ptr< preprocessed_table > create(table_view const &table, rmm::cuda_stream_view stream)
Factory to construct preprocessed_table for use with row equality comparison or row hashing.
Wraps and interprets the result of device_row_comparator, true if the result is weak_ordering::LESS m...
less_comparator(Comparator const &comparator)
Constructs a less_comparator.
Wraps and interprets the result of device_row_comparator, true if the result is weak_ordering::LESS o...
less_equivalent_comparator(Comparator const &comparator)
Constructs a less_equivalent_comparator.
Computes a weak ordering of two values with special sorting behavior.
constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
Operator for relational comparisons.
Preprocessed table for use with lexicographical comparison.
std::invoke_result_t< decltype(table_device_view::create), table_view, rmm::cuda_stream_view > table_device_view_owner
Type of table device view owner for the preprocessed table.
static std::pair< std::shared_ptr< preprocessed_table >, std::shared_ptr< preprocessed_table > > create(table_view const &lhs, table_view const &rhs, host_span< order const > column_order, host_span< null_order const > null_precedence, rmm::cuda_stream_view stream)
Preprocess tables for use with lexicographical comparison.
static std::shared_ptr< preprocessed_table > create(table_view const &table, host_span< order const > column_order, host_span< null_order const > null_precedence, rmm::cuda_stream_view stream)
Preprocess table for use with lexicographical comparison.
Relational comparator functor that compares physical values rather than logical elements like lists,...
constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
Operator for relational comparison of non-floating point values.
A counting iterator that uses strongly typed indices bound to tables.
thrust::iterator_adaptor< strong_index_iterator< Index >, Index > super_t
The base class.
constexpr strong_index_iterator(Underlying n)
Constructs a strong index iterator.
C++20 std::span with reduced feature set.
Indicates the presence of nulls at compile-time or runtime.
Table device view class definitions.
bool has_nested_columns(table_view const &table)
Determine if any nested columns exist in a given table.
Defines the mapping between cudf::type_id runtime type information and concrete C++ types.
#define CUDF_HOST_DEVICE
Indicates that the function or method is usable on host and device.