11 #include <cudf/utilities/export.hpp>
15 #include <cuda/std/span>
21 namespace CUDF_EXPORT
cudf {
24 namespace hashing::detail {
25 template <
typename Key>
30 template <
template <
typename>
class Hasher>
92 std::int32_t precision = 12,
94 nan_policy nan_handling = nan_policy::NAN_IS_NULL,
116 std::int32_t precision,
118 nan_policy nan_handling = nan_policy::NAN_IS_NULL,
169 void merge(cuda::std::span<cuda::std::byte> sketch_span,
190 [[nodiscard]] cuda::std::span<cuda::std::byte>
sketch() noexcept;
201 [[nodiscard]] cuda::std::span<cuda::std::
byte const> sketch() const noexcept;
222 [[nodiscard]] std::int32_t precision() const noexcept;
Object-oriented HyperLogLog sketch for approximate distinct counting.
std::size_t estimate(rmm::cuda_stream_view stream=cudf::get_default_stream()) const
Estimates the approximate number of distinct rows in the sketch.
void merge(cuda::std::span< cuda::std::byte > sketch_span, rmm::cuda_stream_view stream=cudf::get_default_stream())
Merges a sketch from raw bytes into this sketch.
void merge(approx_distinct_count const &other, rmm::cuda_stream_view stream=cudf::get_default_stream())
Merges another sketch into this sketch.
approx_distinct_count(approx_distinct_count &&)=default
Default move constructor.
approx_distinct_count & operator=(approx_distinct_count &&)=default
Move assignment operator.
approx_distinct_count(cuda::std::span< cuda::std::byte > sketch_span, std::int32_t precision, null_policy null_handling=null_policy::EXCLUDE, nan_policy nan_handling=nan_policy::NAN_IS_NULL, rmm::cuda_stream_view stream=cudf::get_default_stream())
Constructs an approximate distinct count sketch from serialized sketch bytes.
cuda::std::span< cuda::std::byte > sketch() noexcept
Gets the raw sketch bytes for serialization or external merging.
void add(table_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream())
Adds rows from a table to the sketch.
approx_distinct_count(table_view const &input, std::int32_t precision=12, null_policy null_handling=null_policy::EXCLUDE, nan_policy nan_handling=nan_policy::NAN_IS_NULL, rmm::cuda_stream_view stream=cudf::get_default_stream())
Constructs an approximate distinct count sketch from a table.
A set of cudf::column_view's of the same size.
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
null_policy
Enum to specify whether to include nulls or exclude nulls.
nan_policy
Enum to treat NaN floating point value as null or non-null element.
Class definitions for (mutable)_table_view
Type declarations for libcudf.